From 1c0ac793d2b9c403f1a8fa60a3748f4ff8623acb Mon Sep 17 00:00:00 2001 From: Martin Haug Date: Sun, 31 Oct 2021 15:01:39 +0100 Subject: [PATCH] Slim `NodeKind` memory footprint --- src/eval/mod.rs | 3 +- src/parse/mod.rs | 39 +++------ src/parse/resolve.rs | 11 +-- src/parse/tokens.rs | 184 ++++++++++++++++++++++++++------------- src/syntax/mod.rs | 6 +- src/syntax/token.rs | 22 +---- tests/typ/markup/raw.typ | 2 +- 7 files changed, 142 insertions(+), 125 deletions(-) diff --git a/src/eval/mod.rs b/src/eval/mod.rs index 8d31c1774..ba266ea58 100644 --- a/src/eval/mod.rs +++ b/src/eval/mod.rs @@ -30,7 +30,6 @@ use std::collections::HashMap; use std::io; use std::mem; use std::path::PathBuf; -use std::rc::Rc; use crate::diag::{At, Error, StrResult, Trace, Tracepoint, TypResult}; use crate::geom::{Angle, Fractional, Length, Relative}; @@ -475,7 +474,7 @@ impl Eval for ClosureExpr { // Clone the body expression so that we don't have a lifetime // dependence on the AST. - let body = Rc::new(self.body()); + let body = self.body().clone(); // Define the actual function. let func = Function::new(name, move |ctx, args| { diff --git a/src/parse/mod.rs b/src/parse/mod.rs index 0425f8248..773f642c8 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -87,18 +87,10 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) { | NodeKind::NonBreakingSpace | NodeKind::Emph | NodeKind::Strong - | NodeKind::Linebreak => p.eat(), + | NodeKind::Linebreak + | NodeKind::Raw(_) => p.eat(), NodeKind::UnicodeEscape(u) => { - if !u.terminated { - p.convert(NodeKind::Error( - ErrorPosition::End, - "expected closing brace".into(), - )); - p.unsuccessful(); - return; - } - if u.character.is_none() { let src = p.peek_src(); p.convert(NodeKind::Error( @@ -112,18 +104,6 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) { p.eat(); } - NodeKind::Raw(r) => { - if !r.terminated { - p.convert(NodeKind::Error( - ErrorPosition::End, - "expected backtick(s)".into(), - )); - p.unsuccessful(); - return; - } - - p.eat(); - } NodeKind::Eq if *at_start => heading(p), NodeKind::ListBullet if *at_start => list_node(p), @@ -159,6 +139,7 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) { // Comments. NodeKind::LineComment | NodeKind::BlockComment => p.eat(), + NodeKind::Error(t, e) if t != &ErrorPosition::Full || e.contains(' ') => p.eat(), _ => { *at_start = false; @@ -338,6 +319,10 @@ fn primary(p: &mut Parser, atomic: bool) { Some(NodeKind::Import) => import_expr(p), Some(NodeKind::Include) => include_expr(p), + Some(NodeKind::Error(t, e)) if t != &ErrorPosition::Full || e.contains(' ') => { + p.eat(); + } + // Nothing. _ => { p.expected("expression"); @@ -363,13 +348,9 @@ fn literal(p: &mut Parser) -> bool { | NodeKind::Fraction(_) | NodeKind::Length(_, _) | NodeKind::Angle(_, _) - | NodeKind::Percentage(_) => p.eat(), - NodeKind::Str(s) => { - p.eat(); - if !s.terminated { - p.expected_at("quote"); - } - } + | NodeKind::Percentage(_) + | NodeKind::Str(_) => p.eat(), + _ => return false, } diff --git a/src/parse/resolve.rs b/src/parse/resolve.rs index 1b3089a63..8d4c04d49 100644 --- a/src/parse/resolve.rs +++ b/src/parse/resolve.rs @@ -46,12 +46,7 @@ pub fn resolve_hex(sequence: &str) -> Option { } /// Resolve the language tag and trims the raw text. -pub fn resolve_raw( - column: usize, - backticks: u8, - text: &str, - terminated: bool, -) -> RawToken { +pub fn resolve_raw(column: usize, backticks: u8, text: &str) -> RawToken { if backticks > 1 { let (tag, inner) = split_at_lang_tag(text); let (text, block) = trim_and_split_raw(column, inner); @@ -59,7 +54,6 @@ pub fn resolve_raw( lang: Some(tag.into()), text: text.into(), backticks, - terminated, block, } } else { @@ -67,7 +61,6 @@ pub fn resolve_raw( lang: None, text: split_lines(text).join("\n").into(), backticks, - terminated, block: false, } } @@ -194,7 +187,7 @@ mod tests { text: &str, block: bool, ) { - let node = resolve_raw(column, backticks, raw, true); + let node = resolve_raw(column, backticks, raw); assert_eq!(node.lang.as_deref(), lang); assert_eq!(node.text, text); assert_eq!(node.block, block); diff --git a/src/parse/tokens.rs b/src/parse/tokens.rs index bfd9f3ed1..8a480b02c 100644 --- a/src/parse/tokens.rs +++ b/src/parse/tokens.rs @@ -5,6 +5,8 @@ use crate::source::SourceFile; use crate::syntax::*; use crate::util::EcoString; +use std::rc::Rc; + /// An iterator over the tokens of a string of source code. pub struct Tokens<'s> { source: &'s SourceFile, @@ -239,11 +241,18 @@ impl<'s> Tokens<'s> { self.s.eat_assert('u'); self.s.eat_assert('{'); let sequence: EcoString = self.s.eat_while(|c| c.is_ascii_alphanumeric()).into(); - NodeKind::UnicodeEscape(UnicodeEscapeToken { - character: resolve_hex(&sequence), - sequence, - terminated: self.s.eat_if('}') - }) + + if self.s.eat_if('}') { + NodeKind::UnicodeEscape(Rc::new(UnicodeEscapeToken { + character: resolve_hex(&sequence), + sequence, + })) + } else { + NodeKind::Error( + ErrorPosition::End, + "expected closing brace".into(), + ) + } } c if c.is_whitespace() => NodeKind::Linebreak, _ => NodeKind::Text("\\".into()), @@ -307,13 +316,12 @@ impl<'s> Tokens<'s> { // Special case for empty inline block. if backticks == 2 { - return NodeKind::Raw(RawToken { + return NodeKind::Raw(Rc::new(RawToken { text: EcoString::new(), lang: None, backticks: 1, - terminated: true, block: false, - }); + })); } let start = self.s.index(); @@ -330,12 +338,26 @@ impl<'s> Tokens<'s> { let terminated = found == backticks; let end = self.s.index() - if terminated { found as usize } else { 0 }; - NodeKind::Raw(resolve_raw( - column, - backticks, - self.s.get(start .. end).into(), - terminated, - )) + if terminated { + NodeKind::Raw(Rc::new(resolve_raw( + column, + backticks, + self.s.get(start .. end).into(), + ))) + } else { + let remaining = backticks - found; + let noun = if remaining == 1 { "backtick" } else { "backticks" }; + + NodeKind::Error( + ErrorPosition::End, + if found == 0 { + format!("expected {} {}", remaining, noun) + } else { + format!("expected {} more {}", remaining, noun) + } + .into(), + ) + } } fn math(&mut self) -> NodeKind { @@ -368,11 +390,22 @@ impl<'s> Tokens<'s> { (true, true) => 2, }; - NodeKind::Math(MathToken { - formula: self.s.get(start .. end).into(), - display, - terminated, - }) + if terminated { + NodeKind::Math(Rc::new(MathToken { + formula: self.s.get(start .. end).into(), + display, + })) + } else { + NodeKind::Error( + ErrorPosition::End, + if display { + "expected closing dollar sign" + } else { + "expected display math closure sequence" + } + .into(), + ) + } } fn ident(&mut self, start: usize) -> NodeKind { @@ -444,17 +477,19 @@ impl<'s> Tokens<'s> { fn string(&mut self) -> NodeKind { let mut escaped = false; - NodeKind::Str(StrToken { - string: resolve_string(self.s.eat_until(|c| { - if c == '"' && !escaped { - true - } else { - escaped = c == '\\' && !escaped; - false - } - })), - terminated: self.s.eat_if('"'), - }) + let string = resolve_string(self.s.eat_until(|c| { + if c == '"' && !escaped { + true + } else { + escaped = c == '\\' && !escaped; + false + } + })); + if self.s.eat_if('"') { + NodeKind::Str(StrToken { string }) + } else { + NodeKind::Error(ErrorPosition::End, "expected quote".into()) + } } fn line_comment(&mut self) -> NodeKind { @@ -526,39 +561,68 @@ mod tests { use TokenMode::{Code, Markup}; fn UnicodeEscape(sequence: &str, terminated: bool) -> NodeKind { - NodeKind::UnicodeEscape(UnicodeEscapeToken { - character: resolve_hex(sequence), - sequence: sequence.into(), - terminated, - }) + if terminated { + NodeKind::UnicodeEscape(Rc::new(UnicodeEscapeToken { + character: resolve_hex(sequence), + sequence: sequence.into(), + })) + } else { + NodeKind::Error(ErrorPosition::End, "expected closing brace".into()) + } } fn Raw( text: &str, lang: Option<&str>, - backticks: u8, - terminated: bool, + backticks_left: u8, + backticks_right: u8, block: bool, ) -> NodeKind { - NodeKind::Raw(RawToken { - text: text.into(), - lang: lang.map(Into::into), - backticks, - terminated, - block, - }) + if backticks_left == backticks_right { + NodeKind::Raw(Rc::new(RawToken { + text: text.into(), + lang: lang.map(Into::into), + backticks: backticks_left, + block, + })) + } else { + let remaining = backticks_left - backticks_right; + let noun = if remaining == 1 { "backtick" } else { "backticks" }; + + NodeKind::Error( + ErrorPosition::End, + if backticks_right == 0 { + format!("expected {} {}", remaining, noun) + } else { + format!("expected {} more {}", remaining, noun) + } + .into(), + ) + } } fn Math(formula: &str, display: bool, terminated: bool) -> NodeKind { - NodeKind::Math(MathToken { - formula: formula.into(), - display, - terminated, - }) + if terminated { + NodeKind::Math(Rc::new(MathToken { formula: formula.into(), display })) + } else { + NodeKind::Error( + ErrorPosition::End, + if display { + "expected closing dollar sign" + } else { + "expected display math closure sequence" + } + .into(), + ) + } } fn Str(string: &str, terminated: bool) -> NodeKind { - NodeKind::Str(StrToken { string: string.into(), terminated }) + if terminated { + NodeKind::Str(StrToken { string: string.into() }) + } else { + NodeKind::Error(ErrorPosition::End, "expected quote".into()) + } } fn Text(string: &str) -> NodeKind { @@ -844,22 +908,22 @@ mod tests { #[test] fn test_tokenize_raw_blocks() { // Test basic raw block. - t!(Markup: "``" => Raw("", None, 1, true, false)); - t!(Markup: "`raw`" => Raw("raw", None, 1, true, false)); - t!(Markup[""]: "`]" => Raw("]", None, 1, false, false)); + t!(Markup: "``" => Raw("", None, 1, 1, false)); + t!(Markup: "`raw`" => Raw("raw", None, 1, 1, false)); + t!(Markup[""]: "`]" => Raw("]", None, 1, 0, false)); // Test special symbols in raw block. - t!(Markup: "`[brackets]`" => Raw("[brackets]", None, 1, true, false)); - t!(Markup[""]: r"`\`` " => Raw(r"\", None, 1, true, false), Raw(" ", None, 1, false, false)); + t!(Markup: "`[brackets]`" => Raw("[brackets]", None, 1, 1, false)); + t!(Markup[""]: r"`\`` " => Raw(r"\", None, 1, 1, false), Raw(" ", None, 1, 0, false)); // Test separated closing backticks. - t!(Markup: "```not `y`e`t```" => Raw("`y`e`t", Some("not"), 3, true, false)); + t!(Markup: "```not `y`e`t```" => Raw("`y`e`t", Some("not"), 3, 3, false)); // Test more backticks. - t!(Markup: "``nope``" => Raw("", None, 1, true, false), Text("nope"), Raw("", None, 1, true, false)); - t!(Markup: "````🚀````" => Raw("", Some("🚀"), 4, true, false)); - t!(Markup[""]: "`````👩‍🚀````noend" => Raw("````noend", Some("👩‍🚀"), 5, false, false)); - t!(Markup[""]: "````raw``````" => Raw("", Some("raw"), 4, true, false), Raw("", None, 1, true, false)); + t!(Markup: "``nope``" => Raw("", None, 1, 1, false), Text("nope"), Raw("", None, 1, 1, false)); + t!(Markup: "````🚀````" => Raw("", Some("🚀"), 4, 4, false)); + t!(Markup[""]: "`````👩‍🚀````noend" => Raw("````noend", Some("👩‍🚀"), 5, 0, false)); + t!(Markup[""]: "````raw``````" => Raw("", Some("raw"), 4, 4, false), Raw("", None, 1, 1, false)); } #[test] diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs index 8e04a569f..ca5b6a1b6 100644 --- a/src/syntax/mod.rs +++ b/src/syntax/mod.rs @@ -121,12 +121,12 @@ pub enum NodeKind { Text(EcoString), /// A slash and the letter "u" followed by a hexadecimal unicode entity /// enclosed in curly braces: `\u{1F5FA}`. - UnicodeEscape(UnicodeEscapeToken), + UnicodeEscape(Rc), /// An arbitrary number of backticks followed by inner contents, terminated /// with the same number of backticks: `` `...` ``. - Raw(RawToken), + Raw(Rc), /// Dollar signs surrounding inner contents. - Math(MathToken), + Math(Rc), /// A numbering: `23.`. /// /// Can also exist without the number: `.`. diff --git a/src/syntax/token.rs b/src/syntax/token.rs index 49613667e..5a6214958 100644 --- a/src/syntax/token.rs +++ b/src/syntax/token.rs @@ -2,15 +2,10 @@ use crate::util::EcoString; /// A quoted string token: `"..."`. #[derive(Debug, Clone, PartialEq)] +#[repr(transparent)] pub struct StrToken { /// The string inside the quotes. - /// - /// _Note_: If the string contains escape sequences these are not yet - /// applied to be able to just store a string slice here instead of - /// a `String`. The resolving is done later in the parser. pub string: EcoString, - /// Whether the closing quote was present. - pub terminated: bool, } /// A raw block token: `` `...` ``. @@ -22,8 +17,6 @@ pub struct RawToken { pub lang: Option, /// The number of opening backticks. pub backticks: u8, - /// Whether all closing backticks were present. - pub terminated: bool, /// Whether to display this as a block. pub block: bool, } @@ -36,8 +29,6 @@ pub struct MathToken { /// Whether the formula is display-level, that is, it is surrounded by /// `$[..]`. pub display: bool, - /// Whether the closing dollars were present. - pub terminated: bool, } /// A unicode escape sequence token: `\u{1F5FA}`. @@ -47,15 +38,4 @@ pub struct UnicodeEscapeToken { pub sequence: EcoString, /// The resulting unicode character. pub character: Option, - /// Whether the closing brace was present. - pub terminated: bool, -} - -/// A unit-bound number token: `1.2em`. -#[derive(Debug, Clone, PartialEq)] -pub struct UnitToken { - /// The number part. - pub number: std::ops::Range, - /// The unit part. - pub unit: std::ops::Range, } diff --git a/tests/typ/markup/raw.typ b/tests/typ/markup/raw.typ index d48432f73..0e053a9b3 100644 --- a/tests/typ/markup/raw.typ +++ b/tests/typ/markup/raw.typ @@ -55,5 +55,5 @@ The keyword ```rust let```. --- // Unterminated. -// Error: 2:1 expected backtick(s) +// Error: 2:1 expected 1 backtick `endless