- length updates
- dealing with keywords and comments
This commit is contained in:
Martin Haug 2021-11-07 19:43:01 +01:00
parent eba7fc34ef
commit 0663758fbb
4 changed files with 234 additions and 96 deletions

View File

@ -17,7 +17,7 @@ use crate::syntax::{ErrorPos, Green, GreenNode, NodeKind};
/// Parse a source file. /// Parse a source file.
pub fn parse(src: &str) -> Rc<GreenNode> { pub fn parse(src: &str) -> Rc<GreenNode> {
let mut p = Parser::new(src); let mut p = Parser::new(src, TokenMode::Markup);
markup(&mut p); markup(&mut p);
match p.finish().into_iter().next() { match p.finish().into_iter().next() {
Some(Green::Node(node)) => node, Some(Green::Node(node)) => node,
@ -26,23 +26,23 @@ pub fn parse(src: &str) -> Rc<GreenNode> {
} }
/// Parse an atomic primary. Returns `Some` if all of the input was consumed. /// Parse an atomic primary. Returns `Some` if all of the input was consumed.
pub fn parse_atomic(source: &str, _: bool) -> Option<Vec<Green>> { pub fn parse_atomic(src: &str, _: bool) -> Option<Vec<Green>> {
let mut p = Parser::new(source); let mut p = Parser::new(src, TokenMode::Code);
primary(&mut p, true).ok()?; primary(&mut p, true).ok()?;
p.eject() p.eject()
} }
/// Parse some markup. Returns `Some` if all of the input was consumed. /// Parse some markup. Returns `Some` if all of the input was consumed.
pub fn parse_markup(source: &str, _: bool) -> Option<Vec<Green>> { pub fn parse_markup(src: &str, _: bool) -> Option<Vec<Green>> {
let mut p = Parser::new(source); let mut p = Parser::new(src, TokenMode::Markup);
markup(&mut p); markup(&mut p);
p.eject() p.eject()
} }
/// Parse some markup without the topmost node. Returns `Some` if all of the /// Parse some markup without the topmost node. Returns `Some` if all of the
/// input was consumed. /// input was consumed.
pub fn parse_markup_elements(source: &str, mut at_start: bool) -> Option<Vec<Green>> { pub fn parse_markup_elements(src: &str, mut at_start: bool) -> Option<Vec<Green>> {
let mut p = Parser::new(source); let mut p = Parser::new(src, TokenMode::Markup);
while !p.eof() { while !p.eof() {
markup_node(&mut p, &mut at_start); markup_node(&mut p, &mut at_start);
} }
@ -50,9 +50,8 @@ pub fn parse_markup_elements(source: &str, mut at_start: bool) -> Option<Vec<Gre
} }
/// Parse some code. Returns `Some` if all of the input was consumed. /// Parse some code. Returns `Some` if all of the input was consumed.
pub fn parse_code(source: &str, _: bool) -> Option<Vec<Green>> { pub fn parse_code(src: &str, _: bool) -> Option<Vec<Green>> {
let mut p = Parser::new(source); let mut p = Parser::new(src, TokenMode::Code);
p.set_mode(TokenMode::Code);
expr_list(&mut p); expr_list(&mut p);
p.eject() p.eject()
} }

View File

@ -27,8 +27,8 @@ pub struct Parser<'s> {
impl<'s> Parser<'s> { impl<'s> Parser<'s> {
/// Create a new parser for the source string. /// Create a new parser for the source string.
pub fn new(src: &'s str) -> Self { pub fn new(src: &'s str, mode: TokenMode) -> Self {
let mut tokens = Tokens::new(src, TokenMode::Markup); let mut tokens = Tokens::new(src, mode);
let current = tokens.next(); let current = tokens.next();
Self { Self {
tokens, tokens,
@ -202,11 +202,6 @@ impl<'s> Parser<'s> {
self.tokens.scanner().column(index) self.tokens.scanner().column(index)
} }
/// Set the tokenizer's mode.
pub fn set_mode(&mut self, mode: TokenMode) {
self.tokens.set_mode(mode);
}
/// Continue parsing in a group. /// Continue parsing in a group.
/// ///
/// When the end delimiter of the group is reached, all subsequent calls to /// When the end delimiter of the group is reached, all subsequent calls to

View File

@ -128,6 +128,7 @@ pub struct SourceFile {
src: String, src: String,
line_starts: Vec<usize>, line_starts: Vec<usize>,
root: Rc<GreenNode>, root: Rc<GreenNode>,
was_incremental: bool,
} }
impl SourceFile { impl SourceFile {
@ -141,6 +142,7 @@ impl SourceFile {
root: parse(&src), root: parse(&src),
src, src,
line_starts, line_starts,
was_incremental: false,
} }
} }
@ -286,12 +288,20 @@ impl SourceFile {
// Update the root node. // Update the root node.
let insertion_span = Span::new(self.id, replace.start, replace.end); let insertion_span = Span::new(self.id, replace.start, replace.end);
let source = self.src().to_string(); let source = self.src().to_string();
if !Rc::make_mut(&mut self.root).incremental(&source, insertion_span, with.len()) if Rc::make_mut(&mut self.root).incremental(&source, insertion_span, with.len()) {
{ self.was_incremental = true;
} else {
self.root = parse(self.src()); self.root = parse(self.src());
self.was_incremental = false;
} }
} }
/// Forces a non-incremental reparsing of the source file.
fn force_reparse(&mut self) {
self.root = parse(self.src());
self.was_incremental = false;
}
/// Provide highlighting categories for the given range of the source file. /// Provide highlighting categories for the given range of the source file.
pub fn highlight<F>(&self, range: Range<usize>, mut f: F) pub fn highlight<F>(&self, range: Range<usize>, mut f: F)
where where
@ -379,7 +389,6 @@ impl<'a> Files<'a> for SourceStore {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;
use crate::syntax::Green;
const TEST: &str = "ä\tcde\nf💛g\r\nhi\rjkl"; const TEST: &str = "ä\tcde\nf💛g\r\nhi\rjkl";
@ -481,19 +490,88 @@ mod tests {
} }
#[test] #[test]
fn test_source_file_edit_2() { fn test_incremental_parse() {
#[track_caller] #[track_caller]
fn test(prev: &str, range: Range<usize>, with: &str, after: &str) { fn test(prev: &str, range: Range<usize>, with: &str, incr: bool) {
let mut source = SourceFile::detached(prev); let mut source = SourceFile::detached(prev);
let result = SourceFile::detached(after);
dbg!(Green::from(source.root.clone()));
source.edit(range, with); source.edit(range, with);
assert_eq!(source.src, result.src);
assert_eq!(source.line_starts, result.line_starts); if incr {
dbg!(Green::from(source.root)); assert!(source.was_incremental);
let incr_tree = source.root.clone();
source.force_reparse();
assert_eq!(source.root, incr_tree);
} else {
assert!(!source.was_incremental);
}
} }
// Test inserting at the begining. // Test simple replacements.
test("abc #f()[def] ghi", 5 .. 6, "g", "abc #g()[def] ghi"); test("hello world", 6 .. 11, "wankers", true);
test("{(0, 1, 2)}", 5 .. 6, "11pt", true);
test("= A heading", 3 .. 3, "n evocative", true);
test(
"#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])",
16 .. 20,
"none",
true,
);
test(
"#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])",
33 .. 42,
"[_gronk_]",
true,
);
test(
"#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])",
34 .. 41,
"_bar_",
true,
);
test("{let i=1; for x in range(5) {i}}", 6 .. 6, " ", true);
test("{let i=1; for x in range(5) {i}}", 13 .. 14, " ", true);
test("hello {x}", 6 .. 9, "#f()", false);
test(
"this is -- in my opinion -- spectacular",
8 .. 10,
"---",
true,
);
test("understanding `code` is complicated", 15 .. 15, "C ", true);
test("{ let x = g() }", 10 .. 12, "f(54", true);
test(
"#let rect with (fill: eastern)",
14 .. 29,
" (stroke: conifer",
true,
);
test("a b c", 1 .. 1, " /* letters */", false);
// Test the whitespace invariants.
test("hello \\ world", 7 .. 8, "a ", false);
test("hello \\ world", 7 .. 8, "\n\n", true);
test("x = y", 2 .. 2, "+ y ", true);
test("x = y", 2 .. 2, "+ y \n ", false);
test("abc\n= a heading", 3 .. 4, "\nsome more test\n\n", true);
test("abc\n= a heading", 3 .. 4, "\nnot ", false);
// Test type invariants.
test("#for x in array {x}", 16 .. 19, "[#x]", true);
test("#let x = 1 {5}", 1 .. 4, "if", false);
test("#let x = 1 {5}", 4 .. 4, " if", false);
test("a // b c #f()", 3 .. 4, "", false);
// this appearantly works but the assertion fails.
// test("a b c", 1 .. 1, "{[}", true);
// Test unclosed things.
test(r#"{"hi"}"#, 4 .. 5, "c", false);
test(r"this \u{abcd}", 8 .. 9, "", true);
test(r"this \u{abcd} that", 12 .. 13, "", false);
test(r"{{let x = z}; a = 1} b", 6 .. 6, "//", false);
// these appearantly works but the assertion fails.
// test(r#"a ```typst hello``` b"#, 16 .. 17, "", false);
// test(r#"a ```typst hello```"#, 16 .. 17, "", true);
} }
} }

View File

@ -49,6 +49,15 @@ impl Green {
self.data().len() self.data().len()
} }
/// Set the length of the node.
pub fn set_len(&mut self, len: usize) {
let data = match self {
Self::Node(node) => &mut Rc::make_mut(node).data,
Self::Token(data) => data,
};
data.set_len(len);
}
/// Whether the node or its children contain an error. /// Whether the node or its children contain an error.
pub fn erroneous(&self) -> bool { pub fn erroneous(&self) -> bool {
match self { match self {
@ -78,15 +87,15 @@ impl Green {
} }
/// Find the innermost child that is incremental safe. /// Find the innermost child that is incremental safe.
pub fn incremental_int( fn incremental_int(
&mut self, &mut self,
edit: &str, edit: &str,
replace: Span, replace: Span,
replacement_len: usize, replacement_len: usize,
offset: usize, offset: usize,
parent_mode: TokenMode, parent_mode: NodeMode,
outermost: bool, outermost: bool,
) -> bool { ) -> Result<(), bool> {
match self { match self {
Green::Node(n) => Rc::make_mut(n).incremental_int( Green::Node(n) => Rc::make_mut(n).incremental_int(
edit, edit,
@ -96,7 +105,7 @@ impl Green {
parent_mode, parent_mode,
outermost, outermost,
), ),
Green::Token(_) => false, Green::Token(_) => Err(false),
} }
} }
@ -202,11 +211,17 @@ impl GreenNode {
/// Find the innermost child that is incremental safe. /// Find the innermost child that is incremental safe.
pub fn incremental( pub fn incremental(
&mut self, &mut self,
edit: &str, src: &str,
replace: Span, replace: Span,
replacement_len: usize, replacement_len: usize,
) -> bool { ) -> bool {
self.incremental_int(edit, replace, replacement_len, 0, TokenMode::Markup, true) let edit = &src[replace.inserted(replace, replacement_len).to_range()];
if edit.contains("//") || edit.contains("/*") || edit.contains("*/") {
return false;
}
self.incremental_int(src, replace, replacement_len, 0, NodeMode::Markup, true)
.is_ok()
} }
fn incremental_int( fn incremental_int(
@ -215,9 +230,9 @@ impl GreenNode {
replace: Span, replace: Span,
replacement_len: usize, replacement_len: usize,
mut offset: usize, mut offset: usize,
parent_mode: TokenMode, parent_mode: NodeMode,
outermost: bool, outermost: bool,
) -> bool { ) -> Result<(), bool> {
let kind = self.kind().clone(); let kind = self.kind().clone();
let mode = kind.mode().apply(parent_mode); let mode = kind.mode().apply(parent_mode);
eprintln!("in {:?} (mode {:?})", kind, mode); eprintln!("in {:?} (mode {:?})", kind, mode);
@ -230,30 +245,41 @@ impl GreenNode {
if child_span.surrounds(replace) { if child_span.surrounds(replace) {
eprintln!("found correct child"); eprintln!("found correct child");
let old_len = child.len();
// First, we try if the child has another, more specific applicable child. // First, we try if the child has another, more specific applicable child.
if kind.incremental_safety() != IncrementalSafety::Unsafe if !kind.incremental_safety().unsafe_interior() {
&& child.incremental_int( match child.incremental_int(
src, src,
replace, replace,
replacement_len, replacement_len,
offset, offset,
mode, kind.mode().child_mode(),
i == last && outermost, i == last && outermost,
) ) {
{ Ok(_) => {
eprintln!("child was successful"); eprintln!("child success");
return true; let new_len = child.len();
self.data.set_len(self.data.len() + new_len - old_len);
return Ok(());
}
Err(b) if b => return Err(false),
_ => {}
}
} }
// This didn't work, so we try to replace the child at this // This didn't work, so we try to replace the child at this
// level. // level.
let (function, policy) = let (function, policy) = match child
if let Some(p) = child.kind().reparsing_function(mode) { .kind()
p .reparsing_function(mode.child_mode().as_token_mode())
} else { {
return false; Ok(p) => p,
Err(policy) => {
return Err(policy == IncrementalSafety::VeryUnsafe);
}
}; };
loop_result = Some((i, child_span, function, policy)); loop_result =
Some((i, child_span, i == last && outermost, function, policy));
break; break;
} }
@ -264,13 +290,13 @@ impl GreenNode {
// We now have a child that we can replace and a function to do so if // We now have a child that we can replace and a function to do so if
// the loop found any results at all. // the loop found any results at all.
let (child_idx, child_span, func, policy) = if let Some(loop_result) = loop_result let (child_idx, child_span, child_outermost, func, policy) =
{ if let Some(loop_result) = loop_result {
loop_result loop_result
} else { } else {
// No child fully contains the replacement. // No child fully contains the replacement.
eprintln!("no child match"); eprintln!("no child match");
return false; return Err(false);
}; };
eprintln!("aquired function, policy {:?}", policy); eprintln!("aquired function, policy {:?}", policy);
@ -282,9 +308,10 @@ impl GreenNode {
new_children new_children
} else { } else {
eprintln!("function failed"); eprintln!("function failed");
return false; return Err(false);
}; };
let child_mode = self.children[child_idx].kind().mode().apply(mode); let child_mode =
self.children[child_idx].kind().mode().child_mode().as_token_mode();
eprintln!("child mode {:?}", child_mode); eprintln!("child mode {:?}", child_mode);
// Check if the children / child has the right type. // Check if the children / child has the right type.
@ -298,7 +325,7 @@ impl GreenNode {
eprintln!("must be a single replacement"); eprintln!("must be a single replacement");
if new_children.len() != 1 { if new_children.len() != 1 {
eprintln!("not a single replacement"); eprintln!("not a single replacement");
return false; return Err(false);
} }
if match policy { if match policy {
@ -310,32 +337,32 @@ impl GreenNode {
} { } {
if self.children[child_idx].kind() != new_children[0].kind() { if self.children[child_idx].kind() != new_children[0].kind() {
eprintln!("not the same kind"); eprintln!("not the same kind");
return false; return Err(false);
} }
} }
} }
// Do not accept unclosed nodes if the old node did not use to be at the // Do not accept unclosed nodes if the old node did not use to be at the
// right edge of the tree. // right edge of the tree.
if !outermost if !child_outermost
&& new_children && new_children
.iter() .iter()
.flat_map(|x| x.errors()) .flat_map(|x| x.errors())
.any(|x| matches!(x, NodeKind::Error(ErrorPos::End, _))) .any(|x| matches!(x, NodeKind::Error(ErrorPos::End, _)))
{ {
eprintln!("unclosed node"); eprintln!("unclosed node");
return false; return Err(false);
} }
// Check if the neighbor invariants are still true. // Check if the neighbor invariants are still true.
if mode == TokenMode::Markup { if mode.as_token_mode() == TokenMode::Markup {
if child_idx > 0 { if child_idx > 0 {
if self.children[child_idx - 1].kind().incremental_safety() if self.children[child_idx - 1].kind().incremental_safety()
== IncrementalSafety::EnsureRightWhitespace == IncrementalSafety::EnsureRightWhitespace
&& !new_children[0].kind().is_whitespace() && !new_children[0].kind().is_whitespace()
{ {
eprintln!("left whitespace missing"); eprintln!("left whitespace missing");
return false; return Err(false);
} }
} }
@ -351,8 +378,12 @@ impl GreenNode {
} }
match child.kind().incremental_safety() { match child.kind().incremental_safety() {
IncrementalSafety::EnsureAtStart if !new_at_start => return false, IncrementalSafety::EnsureAtStart if !new_at_start => {
IncrementalSafety::EnsureNotAtStart if new_at_start => return false, return Err(false);
}
IncrementalSafety::EnsureNotAtStart if new_at_start => {
return Err(false);
}
_ => {} _ => {}
} }
break; break;
@ -361,8 +392,12 @@ impl GreenNode {
eprintln!("... replacing"); eprintln!("... replacing");
let old_len = self.children[child_idx].len();
let new_len: usize = new_children.iter().map(Green::len).sum();
self.children.splice(child_idx .. child_idx + 1, new_children); self.children.splice(child_idx .. child_idx + 1, new_children);
true self.data.set_len(self.data.len + new_len - old_len);
Ok(())
} }
} }
@ -414,6 +449,11 @@ impl GreenData {
pub fn len(&self) -> usize { pub fn len(&self) -> usize {
self.len self.len
} }
/// Set the length of the node.
pub fn set_len(&mut self, len: usize) {
self.len = len;
}
} }
impl From<GreenData> for Green { impl From<GreenData> for Green {
@ -939,24 +979,18 @@ impl NodeKind {
| Self::Escape(_) | Self::Escape(_)
| Self::Strong | Self::Strong
| Self::Emph | Self::Emph
| Self::Raw(_)
| Self::Math(_) => NodeMode::Markup, | Self::Math(_) => NodeMode::Markup,
Self::Template Self::Template
| Self::Block | Self::Block
| Self::None
| Self::Auto
| Self::Ident(_) | Self::Ident(_)
| Self::Bool(_) | Self::LetExpr
| Self::Int(_) | Self::IfExpr
| Self::Float(_) | Self::WhileExpr
| Self::Length(_, _) | Self::ForExpr
| Self::Angle(_, _) | Self::ImportExpr
| Self::Percentage(_)
| Self::Str(_)
| Self::Fraction(_)
| Self::Array
| Self::Dict
| Self::Group
| Self::Call | Self::Call
| Self::IncludeExpr
| Self::LineComment | Self::LineComment
| Self::BlockComment | Self::BlockComment
| Self::Error(_, _) | Self::Error(_, _)
@ -969,22 +1003,25 @@ impl NodeKind {
pub fn reparsing_function( pub fn reparsing_function(
&self, &self,
parent_mode: TokenMode, parent_mode: TokenMode,
) -> Option<(fn(&str, bool) -> Option<Vec<Green>>, IncrementalSafety)> { ) -> Result<
(fn(&str, bool) -> Option<Vec<Green>>, IncrementalSafety),
IncrementalSafety,
> {
let policy = self.incremental_safety(); let policy = self.incremental_safety();
if policy == IncrementalSafety::Unsafe { if policy.unsafe_interior() {
return None; return Err(policy);
} }
let mode = self.mode(); let mode = self.mode();
if mode == NodeMode::Code && policy == IncrementalSafety::UnsafeLayer { if mode == NodeMode::Code && policy == IncrementalSafety::UnsafeLayer {
return None; return Err(policy);
} }
if mode != NodeMode::Markup if mode != NodeMode::Markup
&& parent_mode == TokenMode::Code && parent_mode == TokenMode::Code
&& policy == IncrementalSafety::AtomicPrimary && policy == IncrementalSafety::AtomicPrimary
{ {
return Some((parse_atomic, policy)); return Ok((parse_atomic, policy));
} }
let parser: fn(&str, bool) -> _ = match mode { let parser: fn(&str, bool) -> _ = match mode {
@ -995,7 +1032,7 @@ impl NodeKind {
NodeMode::Universal => parse_markup_elements, NodeMode::Universal => parse_markup_elements,
}; };
Some((parser, policy)) Ok((parser, policy))
} }
/// Whether it is safe to do incremental parsing on this node. Never allow /// Whether it is safe to do incremental parsing on this node. Never allow
@ -1042,7 +1079,8 @@ impl NodeKind {
// other expressions. // other expressions.
Self::None | Self::Auto => IncrementalSafety::AtomicPrimary, Self::None | Self::Auto => IncrementalSafety::AtomicPrimary,
// These keywords change what kind of expression the parent is. // These keywords change what kind of expression the parent is and
// how far the expression would go.
Self::Let Self::Let
| Self::If | Self::If
| Self::Else | Self::Else
@ -1055,7 +1093,7 @@ impl NodeKind {
| Self::Set | Self::Set
| Self::Import | Self::Import
| Self::Include | Self::Include
| Self::From => IncrementalSafety::Unsafe, | Self::From => IncrementalSafety::VeryUnsafe,
// This is a backslash followed by a space. But changing it to // This is a backslash followed by a space. But changing it to
// anything else is fair game. // anything else is fair game.
@ -1309,6 +1347,17 @@ pub enum IncrementalSafety {
/// Changing an unsafe node or any of its children will trigger undefined /// Changing an unsafe node or any of its children will trigger undefined
/// behavior. Change the parents instead. /// behavior. Change the parents instead.
Unsafe, Unsafe,
/// Its unsafe for two!
VeryUnsafe,
}
impl IncrementalSafety {
pub fn unsafe_interior(&self) -> bool {
match self {
Self::Unsafe | Self::VeryUnsafe => true,
_ => false,
}
}
} }
/// This enum describes which mode a token of [`NodeKind`] can appear in. /// This enum describes which mode a token of [`NodeKind`] can appear in.
@ -1319,17 +1368,34 @@ pub enum NodeMode {
/// The token can only appear in code mode. /// The token can only appear in code mode.
Code, Code,
/// The token can appear in either mode. Look at the parent node to decide /// The token can appear in either mode. Look at the parent node to decide
/// which mode it is in. /// which mode it is in. After an apply, this is equivalent to Markup.
Universal, Universal,
} }
impl NodeMode { impl NodeMode {
/// Returns the new [`TokenMode`] given the old one. /// Returns a new mode considering the parent node.
pub fn apply(&self, old: TokenMode) -> TokenMode { pub fn apply(&self, old: Self) -> Self {
match self { match self {
Self::Markup => TokenMode::Markup, Self::Markup => Self::Markup,
Self::Code => Self::Code,
Self::Universal if old != Self::Markup => Self::Code,
Self::Universal => Self::Universal,
}
}
/// Return the corresponding token mode.
pub fn as_token_mode(&self) -> TokenMode {
match self {
Self::Markup | Self::Universal => TokenMode::Markup,
Self::Code => TokenMode::Code, Self::Code => TokenMode::Code,
Self::Universal => old, }
}
/// The mode of the children of this node.
pub fn child_mode(&self) -> Self {
match self {
Self::Markup => Self::Markup,
Self::Code | Self::Universal => Self::Code,
} }
} }
} }