Slim NodeKind memory footprint

This commit is contained in:
Martin Haug 2021-10-31 15:01:39 +01:00
parent 84d35efee3
commit 1c0ac793d2
7 changed files with 142 additions and 125 deletions

View File

@ -30,7 +30,6 @@ use std::collections::HashMap;
use std::io; use std::io;
use std::mem; use std::mem;
use std::path::PathBuf; use std::path::PathBuf;
use std::rc::Rc;
use crate::diag::{At, Error, StrResult, Trace, Tracepoint, TypResult}; use crate::diag::{At, Error, StrResult, Trace, Tracepoint, TypResult};
use crate::geom::{Angle, Fractional, Length, Relative}; use crate::geom::{Angle, Fractional, Length, Relative};
@ -475,7 +474,7 @@ impl Eval for ClosureExpr {
// Clone the body expression so that we don't have a lifetime // Clone the body expression so that we don't have a lifetime
// dependence on the AST. // dependence on the AST.
let body = Rc::new(self.body()); let body = self.body().clone();
// Define the actual function. // Define the actual function.
let func = Function::new(name, move |ctx, args| { let func = Function::new(name, move |ctx, args| {

View File

@ -87,18 +87,10 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) {
| NodeKind::NonBreakingSpace | NodeKind::NonBreakingSpace
| NodeKind::Emph | NodeKind::Emph
| NodeKind::Strong | NodeKind::Strong
| NodeKind::Linebreak => p.eat(), | NodeKind::Linebreak
| NodeKind::Raw(_) => p.eat(),
NodeKind::UnicodeEscape(u) => { NodeKind::UnicodeEscape(u) => {
if !u.terminated {
p.convert(NodeKind::Error(
ErrorPosition::End,
"expected closing brace".into(),
));
p.unsuccessful();
return;
}
if u.character.is_none() { if u.character.is_none() {
let src = p.peek_src(); let src = p.peek_src();
p.convert(NodeKind::Error( p.convert(NodeKind::Error(
@ -112,18 +104,6 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) {
p.eat(); p.eat();
} }
NodeKind::Raw(r) => {
if !r.terminated {
p.convert(NodeKind::Error(
ErrorPosition::End,
"expected backtick(s)".into(),
));
p.unsuccessful();
return;
}
p.eat();
}
NodeKind::Eq if *at_start => heading(p), NodeKind::Eq if *at_start => heading(p),
NodeKind::ListBullet if *at_start => list_node(p), NodeKind::ListBullet if *at_start => list_node(p),
@ -159,6 +139,7 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) {
// Comments. // Comments.
NodeKind::LineComment | NodeKind::BlockComment => p.eat(), NodeKind::LineComment | NodeKind::BlockComment => p.eat(),
NodeKind::Error(t, e) if t != &ErrorPosition::Full || e.contains(' ') => p.eat(),
_ => { _ => {
*at_start = false; *at_start = false;
@ -338,6 +319,10 @@ fn primary(p: &mut Parser, atomic: bool) {
Some(NodeKind::Import) => import_expr(p), Some(NodeKind::Import) => import_expr(p),
Some(NodeKind::Include) => include_expr(p), Some(NodeKind::Include) => include_expr(p),
Some(NodeKind::Error(t, e)) if t != &ErrorPosition::Full || e.contains(' ') => {
p.eat();
}
// Nothing. // Nothing.
_ => { _ => {
p.expected("expression"); p.expected("expression");
@ -363,13 +348,9 @@ fn literal(p: &mut Parser) -> bool {
| NodeKind::Fraction(_) | NodeKind::Fraction(_)
| NodeKind::Length(_, _) | NodeKind::Length(_, _)
| NodeKind::Angle(_, _) | NodeKind::Angle(_, _)
| NodeKind::Percentage(_) => p.eat(), | NodeKind::Percentage(_)
NodeKind::Str(s) => { | NodeKind::Str(_) => p.eat(),
p.eat();
if !s.terminated {
p.expected_at("quote");
}
}
_ => return false, _ => return false,
} }

View File

@ -46,12 +46,7 @@ pub fn resolve_hex(sequence: &str) -> Option<char> {
} }
/// Resolve the language tag and trims the raw text. /// Resolve the language tag and trims the raw text.
pub fn resolve_raw( pub fn resolve_raw(column: usize, backticks: u8, text: &str) -> RawToken {
column: usize,
backticks: u8,
text: &str,
terminated: bool,
) -> RawToken {
if backticks > 1 { if backticks > 1 {
let (tag, inner) = split_at_lang_tag(text); let (tag, inner) = split_at_lang_tag(text);
let (text, block) = trim_and_split_raw(column, inner); let (text, block) = trim_and_split_raw(column, inner);
@ -59,7 +54,6 @@ pub fn resolve_raw(
lang: Some(tag.into()), lang: Some(tag.into()),
text: text.into(), text: text.into(),
backticks, backticks,
terminated,
block, block,
} }
} else { } else {
@ -67,7 +61,6 @@ pub fn resolve_raw(
lang: None, lang: None,
text: split_lines(text).join("\n").into(), text: split_lines(text).join("\n").into(),
backticks, backticks,
terminated,
block: false, block: false,
} }
} }
@ -194,7 +187,7 @@ mod tests {
text: &str, text: &str,
block: bool, block: bool,
) { ) {
let node = resolve_raw(column, backticks, raw, true); let node = resolve_raw(column, backticks, raw);
assert_eq!(node.lang.as_deref(), lang); assert_eq!(node.lang.as_deref(), lang);
assert_eq!(node.text, text); assert_eq!(node.text, text);
assert_eq!(node.block, block); assert_eq!(node.block, block);

View File

@ -5,6 +5,8 @@ use crate::source::SourceFile;
use crate::syntax::*; use crate::syntax::*;
use crate::util::EcoString; use crate::util::EcoString;
use std::rc::Rc;
/// An iterator over the tokens of a string of source code. /// An iterator over the tokens of a string of source code.
pub struct Tokens<'s> { pub struct Tokens<'s> {
source: &'s SourceFile, source: &'s SourceFile,
@ -239,11 +241,18 @@ impl<'s> Tokens<'s> {
self.s.eat_assert('u'); self.s.eat_assert('u');
self.s.eat_assert('{'); self.s.eat_assert('{');
let sequence: EcoString = self.s.eat_while(|c| c.is_ascii_alphanumeric()).into(); let sequence: EcoString = self.s.eat_while(|c| c.is_ascii_alphanumeric()).into();
NodeKind::UnicodeEscape(UnicodeEscapeToken {
if self.s.eat_if('}') {
NodeKind::UnicodeEscape(Rc::new(UnicodeEscapeToken {
character: resolve_hex(&sequence), character: resolve_hex(&sequence),
sequence, sequence,
terminated: self.s.eat_if('}') }))
}) } else {
NodeKind::Error(
ErrorPosition::End,
"expected closing brace".into(),
)
}
} }
c if c.is_whitespace() => NodeKind::Linebreak, c if c.is_whitespace() => NodeKind::Linebreak,
_ => NodeKind::Text("\\".into()), _ => NodeKind::Text("\\".into()),
@ -307,13 +316,12 @@ impl<'s> Tokens<'s> {
// Special case for empty inline block. // Special case for empty inline block.
if backticks == 2 { if backticks == 2 {
return NodeKind::Raw(RawToken { return NodeKind::Raw(Rc::new(RawToken {
text: EcoString::new(), text: EcoString::new(),
lang: None, lang: None,
backticks: 1, backticks: 1,
terminated: true,
block: false, block: false,
}); }));
} }
let start = self.s.index(); let start = self.s.index();
@ -330,12 +338,26 @@ impl<'s> Tokens<'s> {
let terminated = found == backticks; let terminated = found == backticks;
let end = self.s.index() - if terminated { found as usize } else { 0 }; let end = self.s.index() - if terminated { found as usize } else { 0 };
NodeKind::Raw(resolve_raw( if terminated {
NodeKind::Raw(Rc::new(resolve_raw(
column, column,
backticks, backticks,
self.s.get(start .. end).into(), self.s.get(start .. end).into(),
terminated, )))
)) } else {
let remaining = backticks - found;
let noun = if remaining == 1 { "backtick" } else { "backticks" };
NodeKind::Error(
ErrorPosition::End,
if found == 0 {
format!("expected {} {}", remaining, noun)
} else {
format!("expected {} more {}", remaining, noun)
}
.into(),
)
}
} }
fn math(&mut self) -> NodeKind { fn math(&mut self) -> NodeKind {
@ -368,11 +390,22 @@ impl<'s> Tokens<'s> {
(true, true) => 2, (true, true) => 2,
}; };
NodeKind::Math(MathToken { if terminated {
NodeKind::Math(Rc::new(MathToken {
formula: self.s.get(start .. end).into(), formula: self.s.get(start .. end).into(),
display, display,
terminated, }))
}) } else {
NodeKind::Error(
ErrorPosition::End,
if display {
"expected closing dollar sign"
} else {
"expected display math closure sequence"
}
.into(),
)
}
} }
fn ident(&mut self, start: usize) -> NodeKind { fn ident(&mut self, start: usize) -> NodeKind {
@ -444,17 +477,19 @@ impl<'s> Tokens<'s> {
fn string(&mut self) -> NodeKind { fn string(&mut self) -> NodeKind {
let mut escaped = false; let mut escaped = false;
NodeKind::Str(StrToken { let string = resolve_string(self.s.eat_until(|c| {
string: resolve_string(self.s.eat_until(|c| {
if c == '"' && !escaped { if c == '"' && !escaped {
true true
} else { } else {
escaped = c == '\\' && !escaped; escaped = c == '\\' && !escaped;
false false
} }
})), }));
terminated: self.s.eat_if('"'), if self.s.eat_if('"') {
}) NodeKind::Str(StrToken { string })
} else {
NodeKind::Error(ErrorPosition::End, "expected quote".into())
}
} }
fn line_comment(&mut self) -> NodeKind { fn line_comment(&mut self) -> NodeKind {
@ -526,39 +561,68 @@ mod tests {
use TokenMode::{Code, Markup}; use TokenMode::{Code, Markup};
fn UnicodeEscape(sequence: &str, terminated: bool) -> NodeKind { fn UnicodeEscape(sequence: &str, terminated: bool) -> NodeKind {
NodeKind::UnicodeEscape(UnicodeEscapeToken { if terminated {
NodeKind::UnicodeEscape(Rc::new(UnicodeEscapeToken {
character: resolve_hex(sequence), character: resolve_hex(sequence),
sequence: sequence.into(), sequence: sequence.into(),
terminated, }))
}) } else {
NodeKind::Error(ErrorPosition::End, "expected closing brace".into())
}
} }
fn Raw( fn Raw(
text: &str, text: &str,
lang: Option<&str>, lang: Option<&str>,
backticks: u8, backticks_left: u8,
terminated: bool, backticks_right: u8,
block: bool, block: bool,
) -> NodeKind { ) -> NodeKind {
NodeKind::Raw(RawToken { if backticks_left == backticks_right {
NodeKind::Raw(Rc::new(RawToken {
text: text.into(), text: text.into(),
lang: lang.map(Into::into), lang: lang.map(Into::into),
backticks, backticks: backticks_left,
terminated,
block, block,
}) }))
} else {
let remaining = backticks_left - backticks_right;
let noun = if remaining == 1 { "backtick" } else { "backticks" };
NodeKind::Error(
ErrorPosition::End,
if backticks_right == 0 {
format!("expected {} {}", remaining, noun)
} else {
format!("expected {} more {}", remaining, noun)
}
.into(),
)
}
} }
fn Math(formula: &str, display: bool, terminated: bool) -> NodeKind { fn Math(formula: &str, display: bool, terminated: bool) -> NodeKind {
NodeKind::Math(MathToken { if terminated {
formula: formula.into(), NodeKind::Math(Rc::new(MathToken { formula: formula.into(), display }))
display, } else {
terminated, NodeKind::Error(
}) ErrorPosition::End,
if display {
"expected closing dollar sign"
} else {
"expected display math closure sequence"
}
.into(),
)
}
} }
fn Str(string: &str, terminated: bool) -> NodeKind { fn Str(string: &str, terminated: bool) -> NodeKind {
NodeKind::Str(StrToken { string: string.into(), terminated }) if terminated {
NodeKind::Str(StrToken { string: string.into() })
} else {
NodeKind::Error(ErrorPosition::End, "expected quote".into())
}
} }
fn Text(string: &str) -> NodeKind { fn Text(string: &str) -> NodeKind {
@ -844,22 +908,22 @@ mod tests {
#[test] #[test]
fn test_tokenize_raw_blocks() { fn test_tokenize_raw_blocks() {
// Test basic raw block. // Test basic raw block.
t!(Markup: "``" => Raw("", None, 1, true, false)); t!(Markup: "``" => Raw("", None, 1, 1, false));
t!(Markup: "`raw`" => Raw("raw", None, 1, true, false)); t!(Markup: "`raw`" => Raw("raw", None, 1, 1, false));
t!(Markup[""]: "`]" => Raw("]", None, 1, false, false)); t!(Markup[""]: "`]" => Raw("]", None, 1, 0, false));
// Test special symbols in raw block. // Test special symbols in raw block.
t!(Markup: "`[brackets]`" => Raw("[brackets]", None, 1, true, false)); t!(Markup: "`[brackets]`" => Raw("[brackets]", None, 1, 1, false));
t!(Markup[""]: r"`\`` " => Raw(r"\", None, 1, true, false), Raw(" ", None, 1, false, false)); t!(Markup[""]: r"`\`` " => Raw(r"\", None, 1, 1, false), Raw(" ", None, 1, 0, false));
// Test separated closing backticks. // Test separated closing backticks.
t!(Markup: "```not `y`e`t```" => Raw("`y`e`t", Some("not"), 3, true, false)); t!(Markup: "```not `y`e`t```" => Raw("`y`e`t", Some("not"), 3, 3, false));
// Test more backticks. // Test more backticks.
t!(Markup: "``nope``" => Raw("", None, 1, true, false), Text("nope"), Raw("", None, 1, true, false)); t!(Markup: "``nope``" => Raw("", None, 1, 1, false), Text("nope"), Raw("", None, 1, 1, false));
t!(Markup: "````🚀````" => Raw("", Some("🚀"), 4, true, false)); t!(Markup: "````🚀````" => Raw("", Some("🚀"), 4, 4, false));
t!(Markup[""]: "`````👩‍🚀````noend" => Raw("````noend", Some("👩‍🚀"), 5, false, false)); t!(Markup[""]: "`````👩‍🚀````noend" => Raw("````noend", Some("👩‍🚀"), 5, 0, false));
t!(Markup[""]: "````raw``````" => Raw("", Some("raw"), 4, true, false), Raw("", None, 1, true, false)); t!(Markup[""]: "````raw``````" => Raw("", Some("raw"), 4, 4, false), Raw("", None, 1, 1, false));
} }
#[test] #[test]

View File

@ -121,12 +121,12 @@ pub enum NodeKind {
Text(EcoString), Text(EcoString),
/// A slash and the letter "u" followed by a hexadecimal unicode entity /// A slash and the letter "u" followed by a hexadecimal unicode entity
/// enclosed in curly braces: `\u{1F5FA}`. /// enclosed in curly braces: `\u{1F5FA}`.
UnicodeEscape(UnicodeEscapeToken), UnicodeEscape(Rc<UnicodeEscapeToken>),
/// An arbitrary number of backticks followed by inner contents, terminated /// An arbitrary number of backticks followed by inner contents, terminated
/// with the same number of backticks: `` `...` ``. /// with the same number of backticks: `` `...` ``.
Raw(RawToken), Raw(Rc<RawToken>),
/// Dollar signs surrounding inner contents. /// Dollar signs surrounding inner contents.
Math(MathToken), Math(Rc<MathToken>),
/// A numbering: `23.`. /// A numbering: `23.`.
/// ///
/// Can also exist without the number: `.`. /// Can also exist without the number: `.`.

View File

@ -2,15 +2,10 @@ use crate::util::EcoString;
/// A quoted string token: `"..."`. /// A quoted string token: `"..."`.
#[derive(Debug, Clone, PartialEq)] #[derive(Debug, Clone, PartialEq)]
#[repr(transparent)]
pub struct StrToken { pub struct StrToken {
/// The string inside the quotes. /// The string inside the quotes.
///
/// _Note_: If the string contains escape sequences these are not yet
/// applied to be able to just store a string slice here instead of
/// a `String`. The resolving is done later in the parser.
pub string: EcoString, pub string: EcoString,
/// Whether the closing quote was present.
pub terminated: bool,
} }
/// A raw block token: `` `...` ``. /// A raw block token: `` `...` ``.
@ -22,8 +17,6 @@ pub struct RawToken {
pub lang: Option<EcoString>, pub lang: Option<EcoString>,
/// The number of opening backticks. /// The number of opening backticks.
pub backticks: u8, pub backticks: u8,
/// Whether all closing backticks were present.
pub terminated: bool,
/// Whether to display this as a block. /// Whether to display this as a block.
pub block: bool, pub block: bool,
} }
@ -36,8 +29,6 @@ pub struct MathToken {
/// Whether the formula is display-level, that is, it is surrounded by /// Whether the formula is display-level, that is, it is surrounded by
/// `$[..]`. /// `$[..]`.
pub display: bool, pub display: bool,
/// Whether the closing dollars were present.
pub terminated: bool,
} }
/// A unicode escape sequence token: `\u{1F5FA}`. /// A unicode escape sequence token: `\u{1F5FA}`.
@ -47,15 +38,4 @@ pub struct UnicodeEscapeToken {
pub sequence: EcoString, pub sequence: EcoString,
/// The resulting unicode character. /// The resulting unicode character.
pub character: Option<char>, pub character: Option<char>,
/// Whether the closing brace was present.
pub terminated: bool,
}
/// A unit-bound number token: `1.2em`.
#[derive(Debug, Clone, PartialEq)]
pub struct UnitToken {
/// The number part.
pub number: std::ops::Range<usize>,
/// The unit part.
pub unit: std::ops::Range<usize>,
} }

View File

@ -55,5 +55,5 @@ The keyword ```rust let```.
--- ---
// Unterminated. // Unterminated.
// Error: 2:1 expected backtick(s) // Error: 2:1 expected 1 backtick
`endless `endless