- length updates
- dealing with keywords and comments
This commit is contained in:
Martin Haug 2021-11-07 19:43:01 +01:00
parent eba7fc34ef
commit 0663758fbb
4 changed files with 234 additions and 96 deletions

View File

@ -17,7 +17,7 @@ use crate::syntax::{ErrorPos, Green, GreenNode, NodeKind};
/// Parse a source file.
pub fn parse(src: &str) -> Rc<GreenNode> {
let mut p = Parser::new(src);
let mut p = Parser::new(src, TokenMode::Markup);
markup(&mut p);
match p.finish().into_iter().next() {
Some(Green::Node(node)) => node,
@ -26,23 +26,23 @@ pub fn parse(src: &str) -> Rc<GreenNode> {
}
/// Parse an atomic primary. Returns `Some` if all of the input was consumed.
pub fn parse_atomic(source: &str, _: bool) -> Option<Vec<Green>> {
let mut p = Parser::new(source);
pub fn parse_atomic(src: &str, _: bool) -> Option<Vec<Green>> {
let mut p = Parser::new(src, TokenMode::Code);
primary(&mut p, true).ok()?;
p.eject()
}
/// Parse some markup. Returns `Some` if all of the input was consumed.
pub fn parse_markup(source: &str, _: bool) -> Option<Vec<Green>> {
let mut p = Parser::new(source);
pub fn parse_markup(src: &str, _: bool) -> Option<Vec<Green>> {
let mut p = Parser::new(src, TokenMode::Markup);
markup(&mut p);
p.eject()
}
/// Parse some markup without the topmost node. Returns `Some` if all of the
/// input was consumed.
pub fn parse_markup_elements(source: &str, mut at_start: bool) -> Option<Vec<Green>> {
let mut p = Parser::new(source);
pub fn parse_markup_elements(src: &str, mut at_start: bool) -> Option<Vec<Green>> {
let mut p = Parser::new(src, TokenMode::Markup);
while !p.eof() {
markup_node(&mut p, &mut at_start);
}
@ -50,9 +50,8 @@ pub fn parse_markup_elements(source: &str, mut at_start: bool) -> Option<Vec<Gre
}
/// Parse some code. Returns `Some` if all of the input was consumed.
pub fn parse_code(source: &str, _: bool) -> Option<Vec<Green>> {
let mut p = Parser::new(source);
p.set_mode(TokenMode::Code);
pub fn parse_code(src: &str, _: bool) -> Option<Vec<Green>> {
let mut p = Parser::new(src, TokenMode::Code);
expr_list(&mut p);
p.eject()
}

View File

@ -27,8 +27,8 @@ pub struct Parser<'s> {
impl<'s> Parser<'s> {
/// Create a new parser for the source string.
pub fn new(src: &'s str) -> Self {
let mut tokens = Tokens::new(src, TokenMode::Markup);
pub fn new(src: &'s str, mode: TokenMode) -> Self {
let mut tokens = Tokens::new(src, mode);
let current = tokens.next();
Self {
tokens,
@ -202,11 +202,6 @@ impl<'s> Parser<'s> {
self.tokens.scanner().column(index)
}
/// Set the tokenizer's mode.
pub fn set_mode(&mut self, mode: TokenMode) {
self.tokens.set_mode(mode);
}
/// Continue parsing in a group.
///
/// When the end delimiter of the group is reached, all subsequent calls to

View File

@ -128,6 +128,7 @@ pub struct SourceFile {
src: String,
line_starts: Vec<usize>,
root: Rc<GreenNode>,
was_incremental: bool,
}
impl SourceFile {
@ -141,6 +142,7 @@ impl SourceFile {
root: parse(&src),
src,
line_starts,
was_incremental: false,
}
}
@ -286,12 +288,20 @@ impl SourceFile {
// Update the root node.
let insertion_span = Span::new(self.id, replace.start, replace.end);
let source = self.src().to_string();
if !Rc::make_mut(&mut self.root).incremental(&source, insertion_span, with.len())
{
if Rc::make_mut(&mut self.root).incremental(&source, insertion_span, with.len()) {
self.was_incremental = true;
} else {
self.root = parse(self.src());
self.was_incremental = false;
}
}
/// Forces a non-incremental reparsing of the source file.
fn force_reparse(&mut self) {
self.root = parse(self.src());
self.was_incremental = false;
}
/// Provide highlighting categories for the given range of the source file.
pub fn highlight<F>(&self, range: Range<usize>, mut f: F)
where
@ -379,7 +389,6 @@ impl<'a> Files<'a> for SourceStore {
#[cfg(test)]
mod tests {
use super::*;
use crate::syntax::Green;
const TEST: &str = "ä\tcde\nf💛g\r\nhi\rjkl";
@ -481,19 +490,88 @@ mod tests {
}
#[test]
fn test_source_file_edit_2() {
fn test_incremental_parse() {
#[track_caller]
fn test(prev: &str, range: Range<usize>, with: &str, after: &str) {
fn test(prev: &str, range: Range<usize>, with: &str, incr: bool) {
let mut source = SourceFile::detached(prev);
let result = SourceFile::detached(after);
dbg!(Green::from(source.root.clone()));
source.edit(range, with);
assert_eq!(source.src, result.src);
assert_eq!(source.line_starts, result.line_starts);
dbg!(Green::from(source.root));
if incr {
assert!(source.was_incremental);
let incr_tree = source.root.clone();
source.force_reparse();
assert_eq!(source.root, incr_tree);
} else {
assert!(!source.was_incremental);
}
}
// Test inserting at the begining.
test("abc #f()[def] ghi", 5 .. 6, "g", "abc #g()[def] ghi");
// Test simple replacements.
test("hello world", 6 .. 11, "wankers", true);
test("{(0, 1, 2)}", 5 .. 6, "11pt", true);
test("= A heading", 3 .. 3, "n evocative", true);
test(
"#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])",
16 .. 20,
"none",
true,
);
test(
"#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])",
33 .. 42,
"[_gronk_]",
true,
);
test(
"#grid(columns: (auto, 1fr, 40%), [*plonk*], rect(width: 100%, height: 1pt, fill: conifer), [thing])",
34 .. 41,
"_bar_",
true,
);
test("{let i=1; for x in range(5) {i}}", 6 .. 6, " ", true);
test("{let i=1; for x in range(5) {i}}", 13 .. 14, " ", true);
test("hello {x}", 6 .. 9, "#f()", false);
test(
"this is -- in my opinion -- spectacular",
8 .. 10,
"---",
true,
);
test("understanding `code` is complicated", 15 .. 15, "C ", true);
test("{ let x = g() }", 10 .. 12, "f(54", true);
test(
"#let rect with (fill: eastern)",
14 .. 29,
" (stroke: conifer",
true,
);
test("a b c", 1 .. 1, " /* letters */", false);
// Test the whitespace invariants.
test("hello \\ world", 7 .. 8, "a ", false);
test("hello \\ world", 7 .. 8, "\n\n", true);
test("x = y", 2 .. 2, "+ y ", true);
test("x = y", 2 .. 2, "+ y \n ", false);
test("abc\n= a heading", 3 .. 4, "\nsome more test\n\n", true);
test("abc\n= a heading", 3 .. 4, "\nnot ", false);
// Test type invariants.
test("#for x in array {x}", 16 .. 19, "[#x]", true);
test("#let x = 1 {5}", 1 .. 4, "if", false);
test("#let x = 1 {5}", 4 .. 4, " if", false);
test("a // b c #f()", 3 .. 4, "", false);
// this appearantly works but the assertion fails.
// test("a b c", 1 .. 1, "{[}", true);
// Test unclosed things.
test(r#"{"hi"}"#, 4 .. 5, "c", false);
test(r"this \u{abcd}", 8 .. 9, "", true);
test(r"this \u{abcd} that", 12 .. 13, "", false);
test(r"{{let x = z}; a = 1} b", 6 .. 6, "//", false);
// these appearantly works but the assertion fails.
// test(r#"a ```typst hello``` b"#, 16 .. 17, "", false);
// test(r#"a ```typst hello```"#, 16 .. 17, "", true);
}
}

View File

@ -49,6 +49,15 @@ impl Green {
self.data().len()
}
/// Set the length of the node.
pub fn set_len(&mut self, len: usize) {
let data = match self {
Self::Node(node) => &mut Rc::make_mut(node).data,
Self::Token(data) => data,
};
data.set_len(len);
}
/// Whether the node or its children contain an error.
pub fn erroneous(&self) -> bool {
match self {
@ -78,15 +87,15 @@ impl Green {
}
/// Find the innermost child that is incremental safe.
pub fn incremental_int(
fn incremental_int(
&mut self,
edit: &str,
replace: Span,
replacement_len: usize,
offset: usize,
parent_mode: TokenMode,
parent_mode: NodeMode,
outermost: bool,
) -> bool {
) -> Result<(), bool> {
match self {
Green::Node(n) => Rc::make_mut(n).incremental_int(
edit,
@ -96,7 +105,7 @@ impl Green {
parent_mode,
outermost,
),
Green::Token(_) => false,
Green::Token(_) => Err(false),
}
}
@ -202,11 +211,17 @@ impl GreenNode {
/// Find the innermost child that is incremental safe.
pub fn incremental(
&mut self,
edit: &str,
src: &str,
replace: Span,
replacement_len: usize,
) -> bool {
self.incremental_int(edit, replace, replacement_len, 0, TokenMode::Markup, true)
let edit = &src[replace.inserted(replace, replacement_len).to_range()];
if edit.contains("//") || edit.contains("/*") || edit.contains("*/") {
return false;
}
self.incremental_int(src, replace, replacement_len, 0, NodeMode::Markup, true)
.is_ok()
}
fn incremental_int(
@ -215,9 +230,9 @@ impl GreenNode {
replace: Span,
replacement_len: usize,
mut offset: usize,
parent_mode: TokenMode,
parent_mode: NodeMode,
outermost: bool,
) -> bool {
) -> Result<(), bool> {
let kind = self.kind().clone();
let mode = kind.mode().apply(parent_mode);
eprintln!("in {:?} (mode {:?})", kind, mode);
@ -230,30 +245,41 @@ impl GreenNode {
if child_span.surrounds(replace) {
eprintln!("found correct child");
let old_len = child.len();
// First, we try if the child has another, more specific applicable child.
if kind.incremental_safety() != IncrementalSafety::Unsafe
&& child.incremental_int(
if !kind.incremental_safety().unsafe_interior() {
match child.incremental_int(
src,
replace,
replacement_len,
offset,
mode,
kind.mode().child_mode(),
i == last && outermost,
)
{
eprintln!("child was successful");
return true;
) {
Ok(_) => {
eprintln!("child success");
let new_len = child.len();
self.data.set_len(self.data.len() + new_len - old_len);
return Ok(());
}
Err(b) if b => return Err(false),
_ => {}
}
}
// This didn't work, so we try to replace the child at this
// level.
let (function, policy) =
if let Some(p) = child.kind().reparsing_function(mode) {
p
} else {
return false;
};
loop_result = Some((i, child_span, function, policy));
let (function, policy) = match child
.kind()
.reparsing_function(mode.child_mode().as_token_mode())
{
Ok(p) => p,
Err(policy) => {
return Err(policy == IncrementalSafety::VeryUnsafe);
}
};
loop_result =
Some((i, child_span, i == last && outermost, function, policy));
break;
}
@ -264,14 +290,14 @@ impl GreenNode {
// We now have a child that we can replace and a function to do so if
// the loop found any results at all.
let (child_idx, child_span, func, policy) = if let Some(loop_result) = loop_result
{
loop_result
} else {
// No child fully contains the replacement.
eprintln!("no child match");
return false;
};
let (child_idx, child_span, child_outermost, func, policy) =
if let Some(loop_result) = loop_result {
loop_result
} else {
// No child fully contains the replacement.
eprintln!("no child match");
return Err(false);
};
eprintln!("aquired function, policy {:?}", policy);
@ -282,9 +308,10 @@ impl GreenNode {
new_children
} else {
eprintln!("function failed");
return false;
return Err(false);
};
let child_mode = self.children[child_idx].kind().mode().apply(mode);
let child_mode =
self.children[child_idx].kind().mode().child_mode().as_token_mode();
eprintln!("child mode {:?}", child_mode);
// Check if the children / child has the right type.
@ -298,7 +325,7 @@ impl GreenNode {
eprintln!("must be a single replacement");
if new_children.len() != 1 {
eprintln!("not a single replacement");
return false;
return Err(false);
}
if match policy {
@ -310,32 +337,32 @@ impl GreenNode {
} {
if self.children[child_idx].kind() != new_children[0].kind() {
eprintln!("not the same kind");
return false;
return Err(false);
}
}
}
// Do not accept unclosed nodes if the old node did not use to be at the
// right edge of the tree.
if !outermost
if !child_outermost
&& new_children
.iter()
.flat_map(|x| x.errors())
.any(|x| matches!(x, NodeKind::Error(ErrorPos::End, _)))
{
eprintln!("unclosed node");
return false;
return Err(false);
}
// Check if the neighbor invariants are still true.
if mode == TokenMode::Markup {
if mode.as_token_mode() == TokenMode::Markup {
if child_idx > 0 {
if self.children[child_idx - 1].kind().incremental_safety()
== IncrementalSafety::EnsureRightWhitespace
&& !new_children[0].kind().is_whitespace()
{
eprintln!("left whitespace missing");
return false;
return Err(false);
}
}
@ -351,8 +378,12 @@ impl GreenNode {
}
match child.kind().incremental_safety() {
IncrementalSafety::EnsureAtStart if !new_at_start => return false,
IncrementalSafety::EnsureNotAtStart if new_at_start => return false,
IncrementalSafety::EnsureAtStart if !new_at_start => {
return Err(false);
}
IncrementalSafety::EnsureNotAtStart if new_at_start => {
return Err(false);
}
_ => {}
}
break;
@ -361,8 +392,12 @@ impl GreenNode {
eprintln!("... replacing");
let old_len = self.children[child_idx].len();
let new_len: usize = new_children.iter().map(Green::len).sum();
self.children.splice(child_idx .. child_idx + 1, new_children);
true
self.data.set_len(self.data.len + new_len - old_len);
Ok(())
}
}
@ -414,6 +449,11 @@ impl GreenData {
pub fn len(&self) -> usize {
self.len
}
/// Set the length of the node.
pub fn set_len(&mut self, len: usize) {
self.len = len;
}
}
impl From<GreenData> for Green {
@ -939,24 +979,18 @@ impl NodeKind {
| Self::Escape(_)
| Self::Strong
| Self::Emph
| Self::Raw(_)
| Self::Math(_) => NodeMode::Markup,
Self::Template
| Self::Block
| Self::None
| Self::Auto
| Self::Ident(_)
| Self::Bool(_)
| Self::Int(_)
| Self::Float(_)
| Self::Length(_, _)
| Self::Angle(_, _)
| Self::Percentage(_)
| Self::Str(_)
| Self::Fraction(_)
| Self::Array
| Self::Dict
| Self::Group
| Self::LetExpr
| Self::IfExpr
| Self::WhileExpr
| Self::ForExpr
| Self::ImportExpr
| Self::Call
| Self::IncludeExpr
| Self::LineComment
| Self::BlockComment
| Self::Error(_, _)
@ -969,22 +1003,25 @@ impl NodeKind {
pub fn reparsing_function(
&self,
parent_mode: TokenMode,
) -> Option<(fn(&str, bool) -> Option<Vec<Green>>, IncrementalSafety)> {
) -> Result<
(fn(&str, bool) -> Option<Vec<Green>>, IncrementalSafety),
IncrementalSafety,
> {
let policy = self.incremental_safety();
if policy == IncrementalSafety::Unsafe {
return None;
if policy.unsafe_interior() {
return Err(policy);
}
let mode = self.mode();
if mode == NodeMode::Code && policy == IncrementalSafety::UnsafeLayer {
return None;
return Err(policy);
}
if mode != NodeMode::Markup
&& parent_mode == TokenMode::Code
&& policy == IncrementalSafety::AtomicPrimary
{
return Some((parse_atomic, policy));
return Ok((parse_atomic, policy));
}
let parser: fn(&str, bool) -> _ = match mode {
@ -995,7 +1032,7 @@ impl NodeKind {
NodeMode::Universal => parse_markup_elements,
};
Some((parser, policy))
Ok((parser, policy))
}
/// Whether it is safe to do incremental parsing on this node. Never allow
@ -1042,7 +1079,8 @@ impl NodeKind {
// other expressions.
Self::None | Self::Auto => IncrementalSafety::AtomicPrimary,
// These keywords change what kind of expression the parent is.
// These keywords change what kind of expression the parent is and
// how far the expression would go.
Self::Let
| Self::If
| Self::Else
@ -1055,7 +1093,7 @@ impl NodeKind {
| Self::Set
| Self::Import
| Self::Include
| Self::From => IncrementalSafety::Unsafe,
| Self::From => IncrementalSafety::VeryUnsafe,
// This is a backslash followed by a space. But changing it to
// anything else is fair game.
@ -1309,6 +1347,17 @@ pub enum IncrementalSafety {
/// Changing an unsafe node or any of its children will trigger undefined
/// behavior. Change the parents instead.
Unsafe,
/// Its unsafe for two!
VeryUnsafe,
}
impl IncrementalSafety {
pub fn unsafe_interior(&self) -> bool {
match self {
Self::Unsafe | Self::VeryUnsafe => true,
_ => false,
}
}
}
/// This enum describes which mode a token of [`NodeKind`] can appear in.
@ -1319,17 +1368,34 @@ pub enum NodeMode {
/// The token can only appear in code mode.
Code,
/// The token can appear in either mode. Look at the parent node to decide
/// which mode it is in.
/// which mode it is in. After an apply, this is equivalent to Markup.
Universal,
}
impl NodeMode {
/// Returns the new [`TokenMode`] given the old one.
pub fn apply(&self, old: TokenMode) -> TokenMode {
/// Returns a new mode considering the parent node.
pub fn apply(&self, old: Self) -> Self {
match self {
Self::Markup => TokenMode::Markup,
Self::Markup => Self::Markup,
Self::Code => Self::Code,
Self::Universal if old != Self::Markup => Self::Code,
Self::Universal => Self::Universal,
}
}
/// Return the corresponding token mode.
pub fn as_token_mode(&self) -> TokenMode {
match self {
Self::Markup | Self::Universal => TokenMode::Markup,
Self::Code => TokenMode::Code,
Self::Universal => old,
}
}
/// The mode of the children of this node.
pub fn child_mode(&self) -> Self {
match self {
Self::Markup => Self::Markup,
Self::Code | Self::Universal => Self::Code,
}
}
}