From e05eb5fda5d1dfeef168b6fc071b20fdbcce2dcd Mon Sep 17 00:00:00 2001 From: Martin Haug Date: Sun, 28 Nov 2021 18:18:45 +0100 Subject: [PATCH] Code Review: Parser, I can't let you do this --- Cargo.lock | 420 +---------------- Cargo.toml | 6 - benches/timed.rs | 98 ---- src/parse/incremental.rs | 975 ++++++++++++++++++--------------------- src/parse/mod.rs | 10 +- src/parse/parser.rs | 14 +- src/parse/tokens.rs | 28 +- src/source.rs | 16 +- src/syntax/mod.rs | 50 +- 9 files changed, 491 insertions(+), 1126 deletions(-) delete mode 100644 benches/timed.rs diff --git a/Cargo.lock b/Cargo.lock index 98cb4d81b..df3bf74a3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -38,17 +38,6 @@ version = "0.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8da52d66c7071e2e3fa2a1e5c6d088fec47b593032b254f5e980de8ea54454d6" -[[package]] -name = "atty" -version = "0.2.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" -dependencies = [ - "hermit-abi", - "libc", - "winapi", -] - [[package]] name = "autocfg" version = "1.0.1" @@ -67,24 +56,6 @@ version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" -[[package]] -name = "bstr" -version = "0.2.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba3569f383e8f1598449f1a423e72e99569137b47740b1da11ef19af3d5c3223" -dependencies = [ - "lazy_static", - "memchr", - "regex-automata", - "serde", -] - -[[package]] -name = "bumpalo" -version = "3.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f1e260c3a9040a7c19a12468758f4c16f31a81a1fe087482be9570ec864bb6c" - [[package]] name = "bytemuck" version = "1.7.3" @@ -97,32 +68,12 @@ version = "1.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" -[[package]] -name = "cast" -version = "0.2.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c24dab4283a142afa2fdca129b80ad2c6284e073930f964c3a1293c225ee39a" -dependencies = [ - "rustc_version", -] - [[package]] name = "cfg-if" version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" -[[package]] -name = "clap" -version = "2.34.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a0610544180c38b88101fecf2dd634b174a62eef6946f84dfc6a7127512b381c" -dependencies = [ - "bitflags", - "textwrap", - "unicode-width", -] - [[package]] name = "codespan-reporting" version = "0.11.1" @@ -148,108 +99,6 @@ dependencies = [ "cfg-if", ] -[[package]] -name = "criterion" -version = "0.3.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1604dafd25fba2fe2d5895a9da139f8dc9b319a5fe5354ca137cbbce4e178d10" -dependencies = [ - "atty", - "cast", - "clap", - "criterion-plot", - "csv", - "itertools", - "lazy_static", - "num-traits", - "oorandom", - "plotters", - "rayon", - "regex", - "serde", - "serde_cbor", - "serde_derive", - "serde_json", - "tinytemplate", - "walkdir", -] - -[[package]] -name = "criterion-plot" -version = "0.4.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d00996de9f2f7559f7f4dc286073197f83e92256a59ed395f9aac01fe717da57" -dependencies = [ - "cast", - "itertools", -] - -[[package]] -name = "crossbeam-channel" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06ed27e177f16d65f0f0c22a213e17c696ace5dd64b14258b52f9417ccb52db4" -dependencies = [ - "cfg-if", - "crossbeam-utils", -] - -[[package]] -name = "crossbeam-deque" -version = "0.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6455c0ca19f0d2fbf751b908d5c55c1f5cbc65e03c4225427254b46890bdde1e" -dependencies = [ - "cfg-if", - "crossbeam-epoch", - "crossbeam-utils", -] - -[[package]] -name = "crossbeam-epoch" -version = "0.9.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ec02e091aa634e2c3ada4a392989e7c3116673ef0ac5b72232439094d73b7fd" -dependencies = [ - "cfg-if", - "crossbeam-utils", - "lazy_static", - "memoffset", - "scopeguard", -] - -[[package]] -name = "crossbeam-utils" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d82cfc11ce7f2c3faef78d8a684447b40d503d9681acebed6cb728d45940c4db" -dependencies = [ - "cfg-if", - "lazy_static", -] - -[[package]] -name = "csv" -version = "1.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22813a6dc45b335f9bade10bf7271dc477e81113e89eb251a0bc2a8a81c536e1" -dependencies = [ - "bstr", - "csv-core", - "itoa 0.4.8", - "ryu", - "serde", -] - -[[package]] -name = "csv-core" -version = "0.1.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b2466559f260f48ad25fe6317b3c8dac77b5bdb5763ac7d9d6103530663bc90" -dependencies = [ - "memchr", -] - [[package]] name = "data-url" version = "0.1.1" @@ -427,21 +276,6 @@ dependencies = [ "wasi", ] -[[package]] -name = "half" -version = "1.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eabb4a44450da02c90444cf74558da904edde8fb4e9035a9a6a4e15445af0bd7" - -[[package]] -name = "hermit-abi" -version = "0.1.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" -dependencies = [ - "libc", -] - [[package]] name = "iai" version = "0.1.1" @@ -481,27 +315,12 @@ version = "0.4.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b71991ff56294aa922b450139ee08b3bfc70982c6b2c7562771375cf73542dd4" -[[package]] -name = "itoa" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1aab8fc367588b89dcee83ab0fd66b72b50b72fa1904d7095045ace2b0c81c35" - [[package]] name = "jpeg-decoder" version = "0.1.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "229d53d58899083193af11e15917b5640cd40b29ff475a1fe4ef725deb02d0f2" -[[package]] -name = "js-sys" -version = "0.3.55" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7cc9ffccd38c451a86bf13657df244e9c3f37493cce8e5e21e940963777acc84" -dependencies = [ - "wasm-bindgen", -] - [[package]] name = "kurbo" version = "0.8.3" @@ -511,12 +330,6 @@ dependencies = [ "arrayvec 0.7.2", ] -[[package]] -name = "lazy_static" -version = "1.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" - [[package]] name = "libc" version = "0.2.112" @@ -538,12 +351,6 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a3e378b66a060d48947b590737b30a1be76706c8dd7b8ba0f2fe3989c68a853f" -[[package]] -name = "memchr" -version = "2.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "308cc39be01b73d0d18f82a0e7b2a3df85245f84af96fdddc5d202d27e47b86a" - [[package]] name = "memmap2" version = "0.5.0" @@ -553,15 +360,6 @@ dependencies = [ "libc", ] -[[package]] -name = "memoffset" -version = "0.6.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5aa361d4faea93603064a027415f07bd8e1d5c88c9fbf68bf56a285428fd79ce" -dependencies = [ - "autocfg", -] - [[package]] name = "miniz_oxide" version = "0.3.7" @@ -622,28 +420,12 @@ dependencies = [ "autocfg", ] -[[package]] -name = "num_cpus" -version = "1.13.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "19e64526ebdee182341572e50e9ad03965aa510cd94427a4549448f285e957a1" -dependencies = [ - "hermit-abi", - "libc", -] - [[package]] name = "once_cell" version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "da32515d9f6e6e489d7bc9d84c71b060db7247dc035bbe44eac88cf87486d8d5" -[[package]] -name = "oorandom" -version = "11.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ab1bc2a289d34bd04a330323ac98a1b4bc82c9d9fcb1e66b63caa84da26b575" - [[package]] name = "pdf-writer" version = "0.4.1" @@ -651,7 +433,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "36d760a6f2ac90811cba1006a298e8a7e5ce2c922bb5dc7f7000911a4a6b60f4" dependencies = [ "bitflags", - "itoa 0.4.8", + "itoa", "ryu", ] @@ -661,34 +443,6 @@ version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "db8bcd96cb740d03149cbad5518db9fd87126a10ab519c011893b1754134c468" -[[package]] -name = "plotters" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32a3fd9ec30b9749ce28cd91f255d569591cdf937fe280c312143e3c4bad6f2a" -dependencies = [ - "num-traits", - "plotters-backend", - "plotters-svg", - "wasm-bindgen", - "web-sys", -] - -[[package]] -name = "plotters-backend" -version = "0.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d88417318da0eaf0fdcdb51a0ee6c3bed624333bff8f946733049380be67ac1c" - -[[package]] -name = "plotters-svg" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "521fa9638fa597e1dc53e9412a4f9cefb01187ee1f7413076f9e6749e2885ba9" -dependencies = [ - "plotters-backend", -] - [[package]] name = "png" version = "0.16.8" @@ -778,31 +532,6 @@ dependencies = [ "rand_core", ] -[[package]] -name = "rayon" -version = "1.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c06aca804d41dbc8ba42dfd964f0d01334eceb64314b9ecf7c5fad5188a06d90" -dependencies = [ - "autocfg", - "crossbeam-deque", - "either", - "rayon-core", -] - -[[package]] -name = "rayon-core" -version = "1.9.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d78120e2c850279833f1dd3582f730c4ab53ed95aeaaaa862a2a5c71b1656d8e" -dependencies = [ - "crossbeam-channel", - "crossbeam-deque", - "crossbeam-utils", - "lazy_static", - "num_cpus", -] - [[package]] name = "rctree" version = "0.4.0" @@ -828,27 +557,6 @@ dependencies = [ "redox_syscall", ] -[[package]] -name = "regex" -version = "1.5.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d07a8629359eb56f1e2fb1652bb04212c072a87ba68546a04065d525673ac461" -dependencies = [ - "regex-syntax", -] - -[[package]] -name = "regex-automata" -version = "0.1.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" - -[[package]] -name = "regex-syntax" -version = "0.6.25" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b" - [[package]] name = "resvg" version = "0.19.0" @@ -882,15 +590,6 @@ dependencies = [ "xmlparser", ] -[[package]] -name = "rustc_version" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366" -dependencies = [ - "semver", -] - [[package]] name = "rustybuzz" version = "0.4.0" @@ -931,18 +630,6 @@ dependencies = [ "winapi-util", ] -[[package]] -name = "scopeguard" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" - -[[package]] -name = "semver" -version = "1.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "568a8e6258aa33c13358f81fd834adb854c6f7c9468520910a9b1e8fac068012" - [[package]] name = "serde" version = "1.0.132" @@ -952,16 +639,6 @@ dependencies = [ "serde_derive", ] -[[package]] -name = "serde_cbor" -version = "0.11.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2bef2ebfde456fb76bbcf9f59315333decc4fda0b2b44b420243c11e0f5ec1f5" -dependencies = [ - "half", - "serde", -] - [[package]] name = "serde_derive" version = "1.0.132" @@ -973,17 +650,6 @@ dependencies = [ "syn", ] -[[package]] -name = "serde_json" -version = "1.0.74" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee2bb9cd061c5865d345bb02ca49fcef1391741b672b54a0bf7b679badec3142" -dependencies = [ - "itoa 1.0.1", - "ryu", - "serde", -] - [[package]] name = "simplecss" version = "0.2.1" @@ -1046,15 +712,6 @@ dependencies = [ "winapi-util", ] -[[package]] -name = "textwrap" -version = "0.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" -dependencies = [ - "unicode-width", -] - [[package]] name = "thiserror" version = "1.0.30" @@ -1089,16 +746,6 @@ dependencies = [ "safe_arch", ] -[[package]] -name = "tinytemplate" -version = "1.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" -dependencies = [ - "serde", - "serde_json", -] - [[package]] name = "ttf-parser" version = "0.12.3" @@ -1111,7 +758,6 @@ version = "0.1.0" dependencies = [ "anyhow", "codespan-reporting", - "criterion", "dirs", "filedescriptor", "fxhash", @@ -1246,70 +892,6 @@ version = "0.10.2+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6" -[[package]] -name = "wasm-bindgen" -version = "0.2.78" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "632f73e236b219150ea279196e54e610f5dbafa5d61786303d4da54f84e47fce" -dependencies = [ - "cfg-if", - "wasm-bindgen-macro", -] - -[[package]] -name = "wasm-bindgen-backend" -version = "0.2.78" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a317bf8f9fba2476b4b2c85ef4c4af8ff39c3c7f0cdfeed4f82c34a880aa837b" -dependencies = [ - "bumpalo", - "lazy_static", - "log", - "proc-macro2", - "quote", - "syn", - "wasm-bindgen-shared", -] - -[[package]] -name = "wasm-bindgen-macro" -version = "0.2.78" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d56146e7c495528bf6587663bea13a8eb588d39b36b679d83972e1a2dbbdacf9" -dependencies = [ - "quote", - "wasm-bindgen-macro-support", -] - -[[package]] -name = "wasm-bindgen-macro-support" -version = "0.2.78" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7803e0eea25835f8abdc585cd3021b3deb11543c6fe226dcd30b228857c5c5ab" -dependencies = [ - "proc-macro2", - "quote", - "syn", - "wasm-bindgen-backend", - "wasm-bindgen-shared", -] - -[[package]] -name = "wasm-bindgen-shared" -version = "0.2.78" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0237232789cf037d5480773fe568aac745bfe2afbc11a863e97901780a6b47cc" - -[[package]] -name = "web-sys" -version = "0.3.55" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38eb105f1c59d9eaa6b5cdc92b859d85b926e82cb2e0945cd0c9259faa6fe9fb" -dependencies = [ - "js-sys", - "wasm-bindgen", -] - [[package]] name = "winapi" version = "0.3.9" diff --git a/Cargo.toml b/Cargo.toml index 5c4dddcb7..8251a7fa6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -61,7 +61,6 @@ walkdir = "2" # Dependencies updates: # - Bump ttf-parser when rustybuzz is updated # - Bump usvg and resvg in conjunction with svg2pdf -criterion = "0.3" [[bin]] name = "typst" @@ -76,8 +75,3 @@ harness = false name = "oneshot" path = "benches/oneshot.rs" harness = false - -[[bench]] -name = "timed" -path = "benches/timed.rs" -harness = false diff --git a/benches/timed.rs b/benches/timed.rs deleted file mode 100644 index 83820af2f..000000000 --- a/benches/timed.rs +++ /dev/null @@ -1,98 +0,0 @@ -use std::path::Path; - -use criterion::{black_box, criterion_group, criterion_main, Criterion}; - -use typst::eval::eval; -use typst::layout::layout; -use typst::loading::MemLoader; -use typst::parse::{parse, Scanner, TokenMode, Tokens}; -use typst::source::SourceId; -use typst::Context; - -const SRC: &str = include_str!("bench.typ"); -const FONT: &[u8] = include_bytes!("../fonts/IBMPlexSans-Regular.ttf"); - -fn context() -> (Context, SourceId) { - let loader = MemLoader::new().with(Path::new("font.ttf"), FONT).wrap(); - let mut ctx = Context::new(loader); - let id = ctx.sources.provide(Path::new("src.typ"), SRC.to_string()); - (ctx, id) -} - -fn bench_decode(c: &mut Criterion) { - c.bench_function("decode", |b| { - b.iter(|| { - // We don't use chars().count() because that has a special - // superfast implementation. - let mut count = 0; - let mut chars = black_box(SRC).chars(); - while let Some(_) = chars.next() { - count += 1; - } - count - }) - }); -} - -fn bench_scan(c: &mut Criterion) { - c.bench_function("scan", |b| { - b.iter(|| { - let mut count = 0; - let mut scanner = Scanner::new(black_box(SRC)); - while let Some(_) = scanner.eat() { - count += 1; - } - count - }) - }); -} - -fn bench_tokenize(c: &mut Criterion) { - c.bench_function("tokenize", |b| { - b.iter(|| Tokens::new(black_box(SRC), black_box(TokenMode::Markup)).count()) - }); -} - -fn bench_parse(c: &mut Criterion) { - c.bench_function("parse", |b| b.iter(|| parse(SRC))); -} - -fn bench_edit(c: &mut Criterion) { - let (mut ctx, id) = context(); - c.bench_function("edit", |b| { - b.iter(|| black_box(ctx.sources.edit(id, 1168 .. 1171, "_Uhr_"))) - }); -} - -fn bench_eval(c: &mut Criterion) { - let (mut ctx, id) = context(); - let ast = ctx.sources.get(id).ast().unwrap(); - c.bench_function("eval", |b| b.iter(|| eval(&mut ctx, id, &ast).unwrap())); -} - -fn bench_to_tree(c: &mut Criterion) { - let (mut ctx, id) = context(); - let module = ctx.evaluate(id).unwrap(); - c.bench_function("to_tree", |b| { - b.iter(|| module.template.to_pages(ctx.style())) - }); -} - -fn bench_layout(c: &mut Criterion) { - let (mut ctx, id) = context(); - let tree = ctx.execute(id).unwrap(); - c.bench_function("layout", |b| b.iter(|| layout(&mut ctx, &tree))); -} - -criterion_group!( - benches, - bench_decode, - bench_scan, - bench_tokenize, - bench_parse, - bench_edit, - bench_eval, - bench_to_tree, - bench_layout -); -criterion_main!(benches); diff --git a/src/parse/incremental.rs b/src/parse/incremental.rs index 9c912aae4..8e52c1437 100644 --- a/src/parse/incremental.rs +++ b/src/parse/incremental.rs @@ -1,480 +1,13 @@ use std::ops::Range; use std::rc::Rc; -use crate::syntax::{Green, GreenNode, NodeKind, Span}; +use crate::syntax::{Green, GreenNode, NodeKind}; use super::{ parse_atomic, parse_atomic_markup, parse_block, parse_comment, parse_markup, parse_markup_elements, parse_template, TokenMode, }; -/// Allows partial refreshs of the [`Green`] node tree. -/// -/// This struct holds a description of a change. Its methods can be used to try -/// and apply the change to a green tree. -pub struct Reparser<'a> { - /// The new source code, with the change applied. - src: &'a str, - /// Which range in the old source file was changed. - replace_range: Span, - /// How many characters replaced the text in `replacement_range`. - replace_len: usize, -} - -impl<'a> Reparser<'a> { - /// Create a new reparser. - pub fn new(src: &'a str, replace_range: Span, replace_len: usize) -> Self { - Self { src, replace_range, replace_len } - } -} - -impl Reparser<'_> { - /// Find the innermost child that is incremental safe. - pub fn reparse(&self, green: &mut GreenNode) -> Result, ()> { - self.reparse_step(green, 0, TokenMode::Markup, true) - } - - fn reparse_step( - &self, - green: &mut GreenNode, - mut offset: usize, - parent_mode: TokenMode, - outermost: bool, - ) -> Result, ()> { - let kind = green.kind().clone(); - let mode = kind.mode().unwrap_or(parent_mode); - - let mut loop_result = None; - let mut child_at_start = true; - let last = green.children().len() - 1; - let mut start = None; - - for (i, child) in green.children_mut().iter_mut().enumerate() { - let child_span = - Span::new(self.replace_range.source, offset, offset + child.len()); - - // We look for the start in the element but we only take a position - // at the right border if this is markup or the last element. - // - // This is because in Markup mode, we want to examine all nodes - // touching a replacement but in code we want to atomically replace. - if child_span.contains(self.replace_range.start) - && (mode == TokenMode::Markup - || self.replace_range.start != child_span.end - || self.replace_range.len() == 0 - || i == last) - { - start = Some((i, offset)); - break; - } - - offset += child.len(); - child_at_start = child.kind().is_at_start(child_at_start); - } - - let (start_idx, start_offset) = start.ok_or(())?; - - for (i, child) in (green.children_mut()[start_idx ..]).iter_mut().enumerate() { - let i = i + start_idx; - let child_span = - Span::new(self.replace_range.source, offset, offset + child.len()); - - // Similarly to above, the end of the edit must be in the node but - // if it is at the edge and we are in markup node, we also want its - // neighbor! - if child_span.contains(self.replace_range.end) - && (mode != TokenMode::Markup - || self.replace_range.end != child_span.end - || i == last) - { - loop_result = Some(( - start_idx .. i + 1, - Span::new( - self.replace_range.source, - start_offset, - offset + child.len(), - ), - i == last && outermost, - child.kind().clone(), - )); - break; - } else if mode != TokenMode::Markup || !child.kind().post().markup_safe() { - break; - } - - offset += child.len(); - } - - let (child_idx_range, child_span, child_outermost, child_kind) = - loop_result.ok_or(())?; - - if child_idx_range.len() == 1 { - let idx = child_idx_range.start; - let child = &mut green.children_mut()[idx]; - - let old_len = child.len(); - // First, we try if the child has another, more specific applicable child. - if !child_kind.post().unsafe_interior() { - if let Ok(range) = match child { - Green::Node(n) => self.reparse_step( - Rc::make_mut(n), - start_offset, - kind.mode().unwrap_or(TokenMode::Code), - child_outermost, - ), - Green::Token(_) => Err(()), - } { - let new_len = child.len(); - green.update_child_len(new_len, old_len); - return Ok(range); - } - } - } - - debug_assert_ne!(child_idx_range.len(), 0); - - if mode == TokenMode::Code && child_idx_range.len() > 1 { - return Err(()); - } - - // We now have a child that we can replace and a function to do so. - let (func, policy) = - child_kind.reparsing_function(kind.mode().unwrap_or(TokenMode::Code)); - let func = func?; - - let src_span = inserted_span(child_span, self.replace_range, self.replace_len); - let recompile_range = if policy == Postcondition::AtomicPrimary { - src_span.start .. self.src.len() - } else { - src_span.to_range() - }; - - let (mut new_children, unterminated) = - func(&self.src[recompile_range], child_at_start).ok_or(())?; - - // Do not accept unclosed nodes if the old node did not use to be at the - // right edge of the tree. - if !child_outermost && unterminated { - return Err(()); - } - - let insertion = match check_invariants( - &new_children, - green.children(), - child_idx_range.clone(), - child_at_start, - mode, - src_span, - policy, - ) { - InvariantResult::Ok => Ok(new_children), - InvariantResult::UseFirst => Ok(vec![std::mem::take(&mut new_children[0])]), - InvariantResult::Error => Err(()), - }?; - - green.replace_child_range(child_idx_range, insertion); - - Ok(src_span.to_range()) - } -} - -#[derive(Debug, Copy, Clone, PartialEq, Eq)] -enum InvariantResult { - Ok, - UseFirst, - Error, -} - -fn check_invariants( - use_children: &[Green], - old_children: &[Green], - child_idx_range: Range, - child_at_start: bool, - mode: TokenMode, - src_span: Span, - policy: Postcondition, -) -> InvariantResult { - let (new_children, ok) = if policy == Postcondition::AtomicPrimary { - if use_children.iter().map(Green::len).sum::() == src_span.len() { - (use_children, InvariantResult::Ok) - } else if use_children.len() == 1 && use_children[0].len() == src_span.len() { - (&use_children[0 .. 1], InvariantResult::UseFirst) - } else { - return InvariantResult::Error; - } - } else { - (use_children, InvariantResult::Ok) - }; - - let child_mode = old_children[child_idx_range.start] - .kind() - .mode() - .unwrap_or(TokenMode::Code); - - // Check if the children / child has the right type. - let same_kind = match policy { - Postcondition::SameKind(x) => x.map_or(true, |x| x == child_mode), - _ => false, - }; - - if same_kind || policy == Postcondition::AtomicPrimary { - if new_children.len() != 1 { - return InvariantResult::Error; - } - - if same_kind { - if old_children[child_idx_range.start].kind() != new_children[0].kind() { - return InvariantResult::Error; - } - } - } - - // Check if the neighbor invariants are still true. - if mode == TokenMode::Markup { - if child_idx_range.start > 0 { - if old_children[child_idx_range.start - 1].kind().pre() - == Precondition::RightWhitespace - && !new_children[0].kind().is_whitespace() - { - return InvariantResult::Error; - } - } - - if new_children.last().map(|x| x.kind().pre()) - == Some(Precondition::RightWhitespace) - && old_children.len() > child_idx_range.end - { - if !old_children[child_idx_range.end].kind().is_whitespace() { - return InvariantResult::Error; - } - } - - let mut new_at_start = child_at_start; - for child in new_children { - new_at_start = child.kind().is_at_start(new_at_start); - } - - for child in &old_children[child_idx_range.end ..] { - if child.kind().is_trivia() { - new_at_start = child.kind().is_at_start(new_at_start); - continue; - } - - match child.kind().pre() { - Precondition::AtStart if !new_at_start => { - return InvariantResult::Error; - } - Precondition::NotAtStart if new_at_start => { - return InvariantResult::Error; - } - _ => {} - } - break; - } - } - - ok -} - -/// Create a new span by specifying a span in which a modification happened -/// and how many characters are now in that span. -fn inserted_span(mut source: Span, other: Span, n: usize) -> Span { - if !source.surrounds(other) { - panic!(); - } - - let len_change = n as i64 - other.len() as i64; - source.end = (source.end as i64 + len_change) as usize; - source -} - -impl NodeKind { - /// Return the correct reparsing function given the postconditions for the - /// type. - fn reparsing_function( - &self, - parent_mode: TokenMode, - ) -> ( - Result Option<(Vec, bool)>, ()>, - Postcondition, - ) { - let policy = self.post(); - let mode = self.mode().unwrap_or(parent_mode); - - match policy { - Postcondition::Unsafe | Postcondition::UnsafeLayer => (Err(()), policy), - Postcondition::AtomicPrimary if mode == TokenMode::Code => { - (Ok(parse_atomic), policy) - } - Postcondition::AtomicPrimary => (Ok(parse_atomic_markup), policy), - Postcondition::SameKind(x) if x == None || x == Some(mode) => { - let parser: fn(&str, bool) -> _ = match self { - NodeKind::Template => parse_template, - NodeKind::Block => parse_block, - NodeKind::LineComment | NodeKind::BlockComment => parse_comment, - _ => return (Err(()), policy), - }; - - (Ok(parser), policy) - } - _ => { - let parser: fn(&str, bool) -> _ = match mode { - TokenMode::Markup if self == &Self::Markup => parse_markup, - TokenMode::Markup => parse_markup_elements, - _ => return (Err(()), policy), - }; - - (Ok(parser), policy) - } - } - } - - /// Whether it is safe to do incremental parsing on this node. Never allow - /// non-termination errors if this is not already the last leaf node. - pub fn post(&self) -> Postcondition { - match self { - // Replacing parenthesis changes if the expression is balanced and - // is therefore not safe. - Self::LeftBracket - | Self::RightBracket - | Self::LeftBrace - | Self::RightBrace - | Self::LeftParen - | Self::RightParen => Postcondition::Unsafe, - - // Replacing an operator can change whether the parent is an - // operation which makes it unsafe. The star can appear in markup. - Self::Star - | Self::Comma - | Self::Semicolon - | Self::Colon - | Self::Plus - | Self::Minus - | Self::Slash - | Self::Eq - | Self::EqEq - | Self::ExclEq - | Self::Lt - | Self::LtEq - | Self::Gt - | Self::GtEq - | Self::PlusEq - | Self::HyphEq - | Self::StarEq - | Self::SlashEq - | Self::Not - | Self::And - | Self::Or - | Self::With - | Self::Dots - | Self::Arrow => Postcondition::Unsafe, - - // These keywords are literals and can be safely be substituted with - // other expressions. - Self::None | Self::Auto => Postcondition::AtomicPrimary, - - // These keywords change what kind of expression the parent is and - // how far the expression would go. - Self::Let - | Self::Set - | Self::If - | Self::Else - | Self::For - | Self::In - | Self::While - | Self::Break - | Self::Continue - | Self::Return - | Self::Import - | Self::Include - | Self::From => Postcondition::Unsafe, - - Self::Markup => Postcondition::SameKind(None), - - Self::Space(_) => Postcondition::SameKind(Some(TokenMode::Code)), - - // These are all replaceable by other tokens. - Self::Parbreak - | Self::Linebreak - | Self::Text(_) - | Self::TextInLine(_) - | Self::NonBreakingSpace - | Self::EnDash - | Self::EmDash - | Self::Escape(_) - | Self::Strong - | Self::Emph - | Self::Heading - | Self::Enum - | Self::List - | Self::Raw(_) - | Self::Math(_) => Postcondition::Safe, - - // Changing the heading level, enum numbering, or list bullet - // changes the next layer. - Self::EnumNumbering(_) => Postcondition::Unsafe, - - // These are expressions that can be replaced by other expressions. - Self::Ident(_) - | Self::Bool(_) - | Self::Int(_) - | Self::Float(_) - | Self::Length(_, _) - | Self::Angle(_, _) - | Self::Percentage(_) - | Self::Str(_) - | Self::Fraction(_) - | Self::Array - | Self::Dict - | Self::Group => Postcondition::AtomicPrimary, - - Self::Call - | Self::Unary - | Self::Binary - | Self::CallArgs - | Self::Named - | Self::Spread => Postcondition::UnsafeLayer, - - // The closure is a bit magic with the let expression, and also it - // is not atomic. - Self::Closure | Self::ClosureParams => Postcondition::UnsafeLayer, - - // These can appear as bodies and would trigger an error if they - // became something else. - Self::Template => Postcondition::SameKind(None), - Self::Block => Postcondition::SameKind(Some(TokenMode::Code)), - - Self::ForExpr - | Self::WhileExpr - | Self::IfExpr - | Self::LetExpr - | Self::SetExpr - | Self::ImportExpr - | Self::IncludeExpr => Postcondition::AtomicPrimary, - - Self::WithExpr | Self::ForPattern | Self::ImportItems => { - Postcondition::UnsafeLayer - } - - // These can appear everywhere and must not change to other stuff - // because that could change the outer expression. - Self::LineComment | Self::BlockComment => Postcondition::SameKind(None), - - Self::Error(_, _) | Self::Unknown(_) => Postcondition::Unsafe, - } - } - - /// The appropriate precondition for the type. - pub fn pre(&self) -> Precondition { - match self { - Self::Heading | Self::Enum | Self::List => Precondition::AtStart, - Self::TextInLine(_) => Precondition::NotAtStart, - Self::Linebreak => Precondition::RightWhitespace, - _ => Precondition::None, - } - } -} - /// The conditions that a node has to fulfill in order to be replaced. /// /// This can dictate if a node can be replaced at all and if yes, what can take @@ -519,6 +52,438 @@ pub enum Precondition { None, } +/// Allows partial refreshs of the [`Green`] node tree. +/// +/// This struct holds a description of a change. Its methods can be used to try +/// and apply the change to a green tree. +pub struct Reparser<'a> { + /// The new source code, with the change applied. + src: &'a str, + /// Which range in the old source file was changed. + replace_range: Range, + /// How many characters replaced the text in `replace_range`. + replace_len: usize, +} + +impl<'a> Reparser<'a> { + /// Create a new reparser. + pub fn new(src: &'a str, replace_range: Range, replace_len: usize) -> Self { + Self { src, replace_range, replace_len } + } +} + +impl Reparser<'_> { + /// Find the innermost child that is incremental safe. + pub fn reparse(&self, green: &mut GreenNode) -> Option> { + self.reparse_step(green, 0, TokenMode::Markup, true) + } + + fn reparse_step( + &self, + green: &mut GreenNode, + mut offset: usize, + parent_mode: TokenMode, + mut outermost: bool, + ) -> Option> { + let kind = green.kind().clone(); + let mode = kind.mode().unwrap_or(parent_mode); + + let mut child_at_start = true; + let last = green.children().len().saturating_sub(1); + let mut start = None; + + for (i, child) in green.children_mut().iter_mut().enumerate() { + let child_span = offset .. offset + child.len(); + + // We look for the start in the element but we only take a position + // at the right border if this is markup or the last element. + // + // This is because in Markup mode, we want to examine all nodes + // touching a replacement but in code we want to atomically replace. + if child_span.contains(&self.replace_range.start) + || (mode == TokenMode::Markup + && self.replace_range.start == child_span.end) + { + start = Some((i, offset)); + break; + } + + offset += child.len(); + child_at_start = child.kind().is_at_start(child_at_start); + } + + let (start_idx, start_offset) = start?; + let mut end = None; + + for (i, child) in green.children_mut().iter_mut().enumerate().skip(start_idx) { + let child_span = offset .. offset + child.len(); + + // Similarly to above, the end of the edit must be in the node but + // if it is at the edge and we are in markup node, we also want its + // neighbor! + if child_span.contains(&self.replace_range.end) + || self.replace_range.end == child_span.end + && (mode != TokenMode::Markup || i == last) + { + outermost &= i == last; + end = Some(i); + break; + } else if mode != TokenMode::Markup || !child.kind().post().markup_safe() { + break; + } + + offset += child.len(); + } + + let end = end?; + let child_idx_range = start_idx .. end + 1; + let child_span = start_offset .. offset + green.children()[end].len(); + let child_kind = green.children()[end].kind().clone(); + + if child_idx_range.len() == 1 { + let idx = child_idx_range.start; + let child = &mut green.children_mut()[idx]; + let prev_len = child.len(); + + // First, we try if the child has another, more specific applicable child. + if !child_kind.post().unsafe_interior() { + if let Some(range) = match child { + Green::Node(n) => self.reparse_step( + Rc::make_mut(n), + start_offset, + kind.mode().unwrap_or(TokenMode::Code), + outermost, + ), + Green::Token(_) => None, + } { + let new_len = child.len(); + green.update_child_len(new_len, prev_len); + return Some(range); + } + } + } + + debug_assert_ne!(child_idx_range.len(), 0); + + if mode == TokenMode::Code && child_idx_range.len() > 1 { + return None; + } + + // We now have a child that we can replace and a function to do so. + let func = + child_kind.reparsing_function(kind.mode().unwrap_or(TokenMode::Code))?; + let policy = child_kind.post(); + + let len_change = self.replace_len as isize - self.replace_range.len() as isize; + let mut src_span = child_span; + src_span.end = (src_span.end as isize + len_change) as usize; + + let recompile_range = if policy == Postcondition::AtomicPrimary { + src_span.start .. self.src.len() + } else { + src_span.clone() + }; + + let (mut new_children, terminated) = + func(&self.src[recompile_range], child_at_start)?; + + // Do not accept unclosed nodes if the old node did not use to be at the + // right edge of the tree. + if !outermost && !terminated { + return None; + } + + let insertion = match check_invariants( + &new_children, + green.children(), + child_idx_range.clone(), + child_at_start, + mode, + src_span.clone(), + policy, + ) { + InvariantResult::Ok => Some(new_children), + InvariantResult::UseFirst => Some(vec![std::mem::take(&mut new_children[0])]), + InvariantResult::Error => None, + }?; + + green.replace_child_range(child_idx_range, insertion); + + Some(src_span) + } +} + +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +enum InvariantResult { + Ok, + UseFirst, + Error, +} + +fn check_invariants( + use_children: &[Green], + old_children: &[Green], + child_idx_range: Range, + child_at_start: bool, + mode: TokenMode, + src_span: Range, + policy: Postcondition, +) -> InvariantResult { + let (new_children, ok) = if policy == Postcondition::AtomicPrimary { + if use_children.iter().map(Green::len).sum::() == src_span.len() { + (use_children, InvariantResult::Ok) + } else if use_children.len() == 1 && use_children[0].len() == src_span.len() { + (&use_children[0 .. 1], InvariantResult::UseFirst) + } else { + return InvariantResult::Error; + } + } else { + (use_children, InvariantResult::Ok) + }; + + let child_mode = old_children[child_idx_range.start].kind().mode().unwrap_or(mode); + + // Check if the children / child has the right type. + let same_kind = match policy { + Postcondition::SameKind(x) => x.map_or(true, |x| x == child_mode), + _ => false, + }; + + if same_kind || policy == Postcondition::AtomicPrimary { + if new_children.len() != 1 { + return InvariantResult::Error; + } + + if same_kind { + if old_children[child_idx_range.start].kind() != new_children[0].kind() { + return InvariantResult::Error; + } + } + } + + // Check if the neighbor invariants are still true. + if mode == TokenMode::Markup { + if child_idx_range.start > 0 { + if old_children[child_idx_range.start - 1].kind().pre() + == Precondition::RightWhitespace + && !new_children[0].kind().is_whitespace() + { + return InvariantResult::Error; + } + } + + if new_children.last().map(|x| x.kind().pre()) + == Some(Precondition::RightWhitespace) + && old_children.len() > child_idx_range.end + { + if !old_children[child_idx_range.end].kind().is_whitespace() { + return InvariantResult::Error; + } + } + + let mut post_at_start = child_at_start; + for child in new_children { + post_at_start = child.kind().is_at_start(post_at_start); + } + + for child in &old_children[child_idx_range.end ..] { + if child.kind().is_trivia() { + post_at_start = child.kind().is_at_start(post_at_start); + continue; + } + + let pre = child.kind().pre(); + if pre == Precondition::AtStart && !post_at_start + || pre == Precondition::NotAtStart && post_at_start + { + return InvariantResult::Error; + } + break; + } + } + + ok +} + +impl NodeKind { + /// Return the correct reparsing function given the postconditions for the + /// type. + fn reparsing_function( + &self, + parent_mode: TokenMode, + ) -> Option Option<(Vec, bool)>> { + let policy = self.post(); + let mode = self.mode().unwrap_or(parent_mode); + + match policy { + Postcondition::Unsafe | Postcondition::UnsafeLayer => None, + Postcondition::AtomicPrimary if mode == TokenMode::Code => Some(parse_atomic), + Postcondition::AtomicPrimary => Some(parse_atomic_markup), + Postcondition::SameKind(x) if x == None || x == Some(mode) => match self { + NodeKind::Template => Some(parse_template), + NodeKind::Block => Some(parse_block), + NodeKind::LineComment | NodeKind::BlockComment => Some(parse_comment), + _ => None, + }, + _ => match mode { + TokenMode::Markup if self == &Self::Markup => Some(parse_markup), + TokenMode::Markup => Some(parse_markup_elements), + _ => return None, + }, + } + } + + /// Whether it is safe to do incremental parsing on this node. Never allow + /// non-termination errors if this is not already the last leaf node. + pub fn post(&self) -> Postcondition { + match self { + // Replacing parenthesis changes if the expression is balanced and + // is therefore not safe. + Self::LeftBracket + | Self::RightBracket + | Self::LeftBrace + | Self::RightBrace + | Self::LeftParen + | Self::RightParen => Postcondition::Unsafe, + + // Replacing an operator can change whether the parent is an + // operation which makes it unsafe. The star can appear in markup. + Self::Star + | Self::Comma + | Self::Semicolon + | Self::Colon + | Self::Plus + | Self::Minus + | Self::Slash + | Self::Eq + | Self::EqEq + | Self::ExclEq + | Self::Lt + | Self::LtEq + | Self::Gt + | Self::GtEq + | Self::PlusEq + | Self::HyphEq + | Self::StarEq + | Self::SlashEq + | Self::Not + | Self::And + | Self::Or + | Self::With + | Self::Dots + | Self::Arrow => Postcondition::Unsafe, + + // These keywords change what kind of expression the parent is and + // how far the expression would go. + Self::Let + | Self::Set + | Self::If + | Self::Else + | Self::For + | Self::In + | Self::While + | Self::Break + | Self::Continue + | Self::Return + | Self::Import + | Self::Include + | Self::From => Postcondition::Unsafe, + + // Changing the heading level, enum numbering, or list bullet + // changes the next layer. + Self::EnumNumbering(_) => Postcondition::Unsafe, + + Self::Error(_, _) | Self::Unknown(_) => Postcondition::Unsafe, + + // These are complex expressions which may screw with their + // environments. + Self::Call + | Self::Unary + | Self::Binary + | Self::CallArgs + | Self::Named + | Self::Spread => Postcondition::UnsafeLayer, + + // The closure is a bit magic with the let expression, and also it + // is not atomic. + Self::Closure | Self::ClosureParams => Postcondition::UnsafeLayer, + + // Missing these creates errors for the parents. + Self::WithExpr | Self::ForPattern | Self::ImportItems => { + Postcondition::UnsafeLayer + } + + // Only markup is expected at the points where it does occur. + Self::Markup => Postcondition::SameKind(None), + + // These can appear everywhere and must not change to other stuff + // because that could change the outer expression. + Self::LineComment | Self::BlockComment => Postcondition::SameKind(None), + + // These can appear as bodies and would trigger an error if they + // became something else. + Self::Template => Postcondition::SameKind(None), + Self::Block => Postcondition::SameKind(Some(TokenMode::Code)), + + // Whitespace in code mode has to remain whitespace or else the type + // of things would change. + Self::Space(_) => Postcondition::SameKind(Some(TokenMode::Code)), + + // These are expressions that can be replaced by other expressions. + Self::Ident(_) + | Self::Bool(_) + | Self::Int(_) + | Self::Float(_) + | Self::Length(_, _) + | Self::Angle(_, _) + | Self::Percentage(_) + | Self::Str(_) + | Self::Fraction(_) + | Self::Array + | Self::Dict + | Self::Group + | Self::None + | Self::Auto => Postcondition::AtomicPrimary, + + // More complex, but still an expression. + Self::ForExpr + | Self::WhileExpr + | Self::IfExpr + | Self::LetExpr + | Self::SetExpr + | Self::ImportExpr + | Self::IncludeExpr => Postcondition::AtomicPrimary, + + // These are all replaceable by other tokens. + Self::Parbreak + | Self::Linebreak + | Self::Text(_) + | Self::TextInLine(_) + | Self::NonBreakingSpace + | Self::EnDash + | Self::EmDash + | Self::Escape(_) + | Self::Strong + | Self::Emph + | Self::Heading + | Self::Enum + | Self::List + | Self::Raw(_) + | Self::Math(_) => Postcondition::Safe, + } + } + + /// The appropriate precondition for the type. + pub fn pre(&self) -> Precondition { + match self { + Self::Heading | Self::Enum | Self::List => Precondition::AtStart, + Self::TextInLine(_) => Precondition::NotAtStart, + Self::Linebreak => Precondition::RightWhitespace, + _ => Precondition::None, + } + } +} + impl Postcondition { pub fn unsafe_interior(&self) -> bool { match self { @@ -544,6 +509,7 @@ mod tests { use super::*; #[test] + #[rustfmt::skip] fn test_incremental_parse() { #[track_caller] fn test(prev: &str, range: Range, with: &str, incr: Range) { @@ -551,12 +517,14 @@ mod tests { let range = source.edit(range, with); assert_eq!(range, incr); - let incr_tree = source.root(); + let incr_tree = source.root().clone(); assert_eq!(parse(source.src()), incr_tree); } // Test simple replacements. - test("hello world", 6 .. 11, "wankers", 5 .. 13); + test("hello world", 6 .. 11, "walkers", 5 .. 13); + test("some content", 0..12, "", 0..0); + test("", 0..0, "do it", 0..5); test("a d e", 1 .. 3, " b c d", 0 .. 8); test("a #f() e", 1 .. 6, " b c d", 0 .. 8); test("{(0, 1, 2)}", 5 .. 6, "11pt", 5 .. 9); @@ -564,53 +532,18 @@ mod tests { test("your thing", 5 .. 5, "a", 4 .. 11); test("a your thing a", 6 .. 7, "a", 2 .. 12); test("{call(); abc}", 7 .. 7, "[]", 0 .. 15); - test("#call() abc", 7 .. 7, "[]", 0 .. 13); - // test( - // "hi\n- item\n- item 2\n - item 3", - // 10 .. 10, - // " ", - // 9 .. 33, - // ); - test( - "#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])", - 16 .. 20, - "none", - 16 .. 20, - ); - test( - "#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])", - 33 .. 42, - "[_gronk_]", - 33 .. 42, - ); - test( - "#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])", - 34 .. 41, - "_bar_", - 34 .. 39, - ); + test("#call() abc", 7 .. 7, "[]", 0 .. 10); + // test("hi\n- item\n- item 2\n - item 3", 10 .. 10, " ", 9 .. 33); + test("#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])", 16 .. 20, "none", 16 .. 20); + test("#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])", 33 .. 42, "[_gronk_]", 33 .. 42); + test("#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])", 34 .. 41, "_bar_", 34 .. 39); test("{let i=1; for x in range(5) {i}}", 6 .. 6, " ", 1 .. 9); - test("{let i=1; for x in range(5) {i}}", 13 .. 14, " ", 13 .. 15); + test("{let i=1; for x in range(5) {i}}", 13 .. 14, " ", 10 .. 32); test("hello {x}", 6 .. 9, "#f()", 5 .. 10); - test( - "this is -- in my opinion -- spectacular", - 8 .. 10, - "---", - 7 .. 12, - ); - test( - "understanding `code` is complicated", - 15 .. 15, - "C ", - 14 .. 22, - ); + test("this is -- in my opinion -- spectacular", 8 .. 10, "---", 7 .. 12); + test("understanding `code` is complicated", 15 .. 15, "C ", 14 .. 22); test("{ let x = g() }", 10 .. 12, "f(54", 0 .. 17); - test( - "a #let rect with (fill: eastern)\nb", - 16 .. 31, - " (stroke: conifer", - 2 .. 34, - ); + test("a #let rect with (fill: eastern)\nb", 16 .. 31, " (stroke: conifer", 2 .. 34); // Test the whitespace invariants. test("hello \\ world", 7 .. 8, "a ", 6 .. 14); @@ -642,18 +575,8 @@ mod tests { test(r"{{let x = z}; a = 1} b", 6 .. 6, "//", 0 .. 24); test("a b c", 1 .. 1, " /* letters */", 0 .. 16); test("a b c", 1 .. 1, " /* letters", 0 .. 16); - test( - "{if i==1 {a} else [b]; b()}", - 12 .. 12, - " /* letters */", - 1 .. 35, - ); - test( - "{if i==1 {a} else [b]; b()}", - 12 .. 12, - " /* letters", - 0 .. 38, - ); + test("{if i==1 {a} else [b]; b()}", 12 .. 12, " /* letters */", 1 .. 35); + test("{if i==1 {a} else [b]; b()}", 12 .. 12, " /* letters", 0 .. 38); test(r#"a ```typst hello``` b"#, 16 .. 17, "", 0 .. 20); test(r#"a ```typst hello```"#, 16 .. 17, "", 2 .. 18); diff --git a/src/parse/mod.rs b/src/parse/mod.rs index 2c421374b..2c5afb6b3 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -64,7 +64,7 @@ pub fn parse_markup_elements( /// Parse a template literal. Returns `Some` if all of the input was consumed. pub fn parse_template(source: &str, _: bool) -> Option<(Vec, bool)> { let mut p = Parser::new(source, TokenMode::Code); - if !matches!(p.peek(), Some(NodeKind::LeftBracket)) { + if !p.at(&NodeKind::LeftBracket) { return None; } @@ -75,7 +75,7 @@ pub fn parse_template(source: &str, _: bool) -> Option<(Vec, bool)> { /// Parse a code block. Returns `Some` if all of the input was consumed. pub fn parse_block(source: &str, _: bool) -> Option<(Vec, bool)> { let mut p = Parser::new(source, TokenMode::Code); - if !matches!(p.peek(), Some(NodeKind::LeftBrace)) { + if !p.at(&NodeKind::LeftBrace) { return None; } @@ -252,14 +252,14 @@ fn expr_prec(p: &mut Parser, atomic: bool, min_prec: usize) -> ParseResult { let marker = p.marker(); // Start the unary expression. - match (!atomic).then(|| p.peek().and_then(UnOp::from_token)).flatten() { - Some(op) => { + match p.peek().and_then(UnOp::from_token) { + Some(op) if !atomic => { p.eat(); let prec = op.precedence(); expr_prec(p, atomic, prec)?; marker.end(p, NodeKind::Unary); } - None => primary(p, atomic)?, + _ => primary(p, atomic)?, }; loop { diff --git a/src/parse/parser.rs b/src/parse/parser.rs index 06cb15785..ade9b5df5 100644 --- a/src/parse/parser.rs +++ b/src/parse/parser.rs @@ -22,7 +22,7 @@ pub struct Parser<'s> { /// The children of the currently built node. children: Vec, /// Whether the last group was terminated. - last_group_terminated: bool, + last_terminated: bool, } impl<'s> Parser<'s> { @@ -38,7 +38,7 @@ impl<'s> Parser<'s> { current_start: 0, groups: vec![], children: vec![], - last_group_terminated: true, + last_terminated: true, } } @@ -50,7 +50,7 @@ impl<'s> Parser<'s> { /// End the parsing process and return multiple children. pub fn eject(self) -> Option<(Vec, bool)> { if self.eof() && self.group_success() { - Some((self.children, self.tokens.was_unterminated())) + Some((self.children, self.tokens.was_terminated())) } else { None } @@ -99,7 +99,7 @@ impl<'s> Parser<'s> { /// remains stuff in the string. pub fn eject_partial(self) -> Option<(Vec, bool)> { self.group_success() - .then(|| (self.children, self.tokens.was_unterminated())) + .then(|| (self.children, self.tokens.was_terminated())) } /// Whether the end of the source string or group is reached. @@ -244,7 +244,7 @@ impl<'s> Parser<'s> { let group = self.groups.pop().expect("no started group"); self.tokens.set_mode(group.prev_mode); self.repeek(); - self.last_group_terminated = true; + self.last_terminated = true; let mut rescan = self.tokens.mode() != group_mode; @@ -263,7 +263,7 @@ impl<'s> Parser<'s> { rescan = false; } else if required { self.push_error(format_eco!("expected {}", end)); - self.last_group_terminated = false; + self.last_terminated = false; } } @@ -283,7 +283,7 @@ impl<'s> Parser<'s> { /// Check if the group processing was successfully terminated. pub fn group_success(&self) -> bool { - self.last_group_terminated && self.groups.is_empty() + self.last_terminated && self.groups.is_empty() } /// Low-level bump that consumes exactly one token without special trivia diff --git a/src/parse/tokens.rs b/src/parse/tokens.rs index 7be31fe18..836e8cf17 100644 --- a/src/parse/tokens.rs +++ b/src/parse/tokens.rs @@ -13,7 +13,7 @@ use crate::util::EcoString; pub struct Tokens<'s> { s: Scanner<'s>, mode: TokenMode, - has_unterminated: bool, + was_terminated: bool, } /// What kind of tokens to emit. @@ -32,7 +32,7 @@ impl<'s> Tokens<'s> { Self { s: Scanner::new(src), mode, - has_unterminated: false, + was_terminated: true, } } @@ -69,10 +69,10 @@ impl<'s> Tokens<'s> { self.s } - /// Whether the last token was unterminated. + /// Whether the last token was terminated. #[inline] - pub fn was_unterminated(&self) -> bool { - self.has_unterminated + pub fn was_terminated(&self) -> bool { + self.was_terminated } } @@ -259,7 +259,7 @@ impl<'s> Tokens<'s> { ) } } else { - self.has_unterminated = true; + self.was_terminated = false; NodeKind::Error( ErrorPos::End, "expected closing brace".into(), @@ -358,7 +358,7 @@ impl<'s> Tokens<'s> { let remaining = backticks - found; let noun = if remaining == 1 { "backtick" } else { "backticks" }; - self.has_unterminated = true; + self.was_terminated = false; NodeKind::Error( ErrorPos::End, if found == 0 { @@ -406,7 +406,7 @@ impl<'s> Tokens<'s> { display, })) } else { - self.has_unterminated = true; + self.was_terminated = false; NodeKind::Error( ErrorPos::End, if !display || (!escaped && dollar) { @@ -495,7 +495,7 @@ impl<'s> Tokens<'s> { if self.s.eat_if('"') { NodeKind::Str(string) } else { - self.has_unterminated = true; + self.was_terminated = false; NodeKind::Error(ErrorPos::End, "expected quote".into()) } } @@ -503,7 +503,7 @@ impl<'s> Tokens<'s> { fn line_comment(&mut self) -> NodeKind { self.s.eat_until(is_newline); if self.s.peek().is_none() { - self.has_unterminated = true; + self.was_terminated = false; } NodeKind::LineComment } @@ -511,7 +511,7 @@ impl<'s> Tokens<'s> { fn block_comment(&mut self) -> NodeKind { let mut state = '_'; let mut depth = 1; - let mut terminated = false; + self.was_terminated = false; // Find the first `*/` that does not correspond to a nested `/*`. while let Some(c) = self.s.eat() { @@ -519,7 +519,7 @@ impl<'s> Tokens<'s> { ('*', '/') => { depth -= 1; if depth == 0 { - terminated = true; + self.was_terminated = true; break; } '_' @@ -532,10 +532,6 @@ impl<'s> Tokens<'s> { } } - if !terminated { - self.has_unterminated = true; - } - NodeKind::BlockComment } diff --git a/src/source.rs b/src/source.rs index 421412ee8..6cca9f751 100644 --- a/src/source.rs +++ b/src/source.rs @@ -12,7 +12,7 @@ use crate::diag::TypResult; use crate::loading::{FileHash, Loader}; use crate::parse::{is_newline, parse, Reparser, Scanner}; use crate::syntax::ast::Markup; -use crate::syntax::{self, Category, GreenNode, RedNode, Span}; +use crate::syntax::{self, Category, GreenNode, RedNode}; use crate::util::PathExt; #[cfg(feature = "codespan-reporting")] @@ -265,7 +265,8 @@ impl SourceFile { /// Edit the source file by replacing the given range. /// - /// This panics if the `replace` range is out of bounds. + /// Returns the range of the section in the new source that was ultimately + /// reparsed. The method panics if the `replace` range is out of bounds. pub fn edit(&mut self, replace: Range, with: &str) -> Range { let start = replace.start; self.src.replace_range(replace.clone(), with); @@ -284,9 +285,8 @@ impl SourceFile { .extend(newlines(&self.src[start ..]).map(|idx| start + idx)); // Update the root node. - let span = Span::new(self.id, replace.start, replace.end); - let reparser = Reparser::new(&self.src, span, with.len()); - if let Ok(range) = reparser.reparse(Rc::make_mut(&mut self.root)) { + let reparser = Reparser::new(&self.src, replace, with.len()); + if let Some(range) = reparser.reparse(Rc::make_mut(&mut self.root)) { range } else { self.root = parse(&self.src); @@ -302,12 +302,6 @@ impl SourceFile { let red = RedNode::from_root(self.root.clone(), self.id); syntax::highlight(red.as_ref(), range, &mut f) } - - /// Obtain a reference to the source's root green node. - #[cfg(test)] - pub(crate) fn root(&self) -> Rc { - self.root.clone() - } } /// The indices at which lines start (right behind newlines). diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs index 9ab530d81..b72e58431 100644 --- a/src/syntax/mod.rs +++ b/src/syntax/mod.rs @@ -48,15 +48,6 @@ impl Green { self.data().len() } - /// Set the length of the node. - pub fn set_len(&mut self, len: usize) { - let data = match self { - Self::Node(node) => &mut Rc::make_mut(node).data, - Self::Token(data) => data, - }; - data.set_len(len); - } - /// Whether the node or its children contain an error. pub fn erroneous(&self) -> bool { match self { @@ -139,11 +130,6 @@ impl GreenNode { &self.children } - /// The node's children, mutably. - pub fn children_mut(&mut self) -> &mut [Green] { - &mut self.children - } - /// The node's metadata. pub fn data(&self) -> &GreenData { &self.data @@ -159,10 +145,15 @@ impl GreenNode { self.data().len() } + /// The node's children, mutably. + pub(crate) fn children_mut(&mut self) -> &mut [Green] { + &mut self.children + } + /// Replaces a range of children with some replacement. /// /// This method updates the `erroneous` and `data.len` fields. - pub fn replace_child_range( + pub(crate) fn replace_child_range( &mut self, child_idx_range: Range, replacement: Vec, @@ -187,12 +178,12 @@ impl GreenNode { self.erroneous = self.erroneous || replacement.iter().any(Green::erroneous); self.children.splice(child_idx_range, replacement); - self.data.set_len(self.data.len + new_len - old_len); + self.data.len = self.data.len + new_len - old_len; } /// Update the length of this node given the old and new length of a /// replaced child. - pub fn update_child_len(&mut self, new_len: usize, old_len: usize) { + pub(crate) fn update_child_len(&mut self, new_len: usize, old_len: usize) { self.data.len = self.data.len() + new_len - old_len; self.erroneous = self.children.iter().any(|x| x.erroneous()); } @@ -246,11 +237,6 @@ impl GreenData { pub fn len(&self) -> usize { self.len } - - /// Set the length of the node. - pub fn set_len(&mut self, len: usize) { - self.len = len; - } } impl From for Green { @@ -261,7 +247,7 @@ impl From for Green { impl Debug for GreenData { fn fmt(&self, f: &mut Formatter) -> fmt::Result { - write!(f, "{:?}: {}", self.kind, self.len) + write!(f, "{:?}: {}", &self.kind, self.len) } } @@ -375,11 +361,6 @@ impl<'a> RedRef<'a> { Span::new(self.id, self.offset, self.offset + self.green.len()) } - /// Whether the node or its children contain an error. - pub fn erroneous(self) -> bool { - self.green.erroneous() - } - /// The error messages for this node and its descendants. pub fn errors(self) -> Vec { if !self.green.erroneous() { @@ -731,19 +712,12 @@ impl NodeKind { /// Whether this is whitespace. pub fn is_whitespace(&self) -> bool { - match self { - Self::Space(_) | Self::Parbreak => true, - _ => false, - } + matches!(self, Self::Space(_) | Self::Parbreak) } /// Whether this is trivia. pub fn is_trivia(&self) -> bool { - match self { - _ if self.is_whitespace() => true, - Self::LineComment | Self::BlockComment => true, - _ => false, - } + self.is_whitespace() || matches!(self, Self::LineComment | Self::BlockComment) } /// Whether this is some kind of error. @@ -765,7 +739,6 @@ impl NodeKind { pub fn mode(&self) -> Option { match self { Self::Markup - | Self::Space(_) | Self::Linebreak | Self::Parbreak | Self::Text(_) @@ -783,6 +756,7 @@ impl NodeKind { | Self::Raw(_) | Self::Math(_) => Some(TokenMode::Markup), Self::Template + | Self::Space(_) | Self::Block | Self::Ident(_) | Self::LetExpr