Slim NodeKind memory footprint

This commit is contained in:
Martin Haug 2021-10-31 15:01:39 +01:00
parent 84d35efee3
commit 1c0ac793d2
7 changed files with 142 additions and 125 deletions

View File

@ -30,7 +30,6 @@ use std::collections::HashMap;
use std::io;
use std::mem;
use std::path::PathBuf;
use std::rc::Rc;
use crate::diag::{At, Error, StrResult, Trace, Tracepoint, TypResult};
use crate::geom::{Angle, Fractional, Length, Relative};
@ -475,7 +474,7 @@ impl Eval for ClosureExpr {
// Clone the body expression so that we don't have a lifetime
// dependence on the AST.
let body = Rc::new(self.body());
let body = self.body().clone();
// Define the actual function.
let func = Function::new(name, move |ctx, args| {

View File

@ -87,18 +87,10 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) {
| NodeKind::NonBreakingSpace
| NodeKind::Emph
| NodeKind::Strong
| NodeKind::Linebreak => p.eat(),
| NodeKind::Linebreak
| NodeKind::Raw(_) => p.eat(),
NodeKind::UnicodeEscape(u) => {
if !u.terminated {
p.convert(NodeKind::Error(
ErrorPosition::End,
"expected closing brace".into(),
));
p.unsuccessful();
return;
}
if u.character.is_none() {
let src = p.peek_src();
p.convert(NodeKind::Error(
@ -112,18 +104,6 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) {
p.eat();
}
NodeKind::Raw(r) => {
if !r.terminated {
p.convert(NodeKind::Error(
ErrorPosition::End,
"expected backtick(s)".into(),
));
p.unsuccessful();
return;
}
p.eat();
}
NodeKind::Eq if *at_start => heading(p),
NodeKind::ListBullet if *at_start => list_node(p),
@ -159,6 +139,7 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) {
// Comments.
NodeKind::LineComment | NodeKind::BlockComment => p.eat(),
NodeKind::Error(t, e) if t != &ErrorPosition::Full || e.contains(' ') => p.eat(),
_ => {
*at_start = false;
@ -338,6 +319,10 @@ fn primary(p: &mut Parser, atomic: bool) {
Some(NodeKind::Import) => import_expr(p),
Some(NodeKind::Include) => include_expr(p),
Some(NodeKind::Error(t, e)) if t != &ErrorPosition::Full || e.contains(' ') => {
p.eat();
}
// Nothing.
_ => {
p.expected("expression");
@ -363,13 +348,9 @@ fn literal(p: &mut Parser) -> bool {
| NodeKind::Fraction(_)
| NodeKind::Length(_, _)
| NodeKind::Angle(_, _)
| NodeKind::Percentage(_) => p.eat(),
NodeKind::Str(s) => {
p.eat();
if !s.terminated {
p.expected_at("quote");
}
}
| NodeKind::Percentage(_)
| NodeKind::Str(_) => p.eat(),
_ => return false,
}

View File

@ -46,12 +46,7 @@ pub fn resolve_hex(sequence: &str) -> Option<char> {
}
/// Resolve the language tag and trims the raw text.
pub fn resolve_raw(
column: usize,
backticks: u8,
text: &str,
terminated: bool,
) -> RawToken {
pub fn resolve_raw(column: usize, backticks: u8, text: &str) -> RawToken {
if backticks > 1 {
let (tag, inner) = split_at_lang_tag(text);
let (text, block) = trim_and_split_raw(column, inner);
@ -59,7 +54,6 @@ pub fn resolve_raw(
lang: Some(tag.into()),
text: text.into(),
backticks,
terminated,
block,
}
} else {
@ -67,7 +61,6 @@ pub fn resolve_raw(
lang: None,
text: split_lines(text).join("\n").into(),
backticks,
terminated,
block: false,
}
}
@ -194,7 +187,7 @@ mod tests {
text: &str,
block: bool,
) {
let node = resolve_raw(column, backticks, raw, true);
let node = resolve_raw(column, backticks, raw);
assert_eq!(node.lang.as_deref(), lang);
assert_eq!(node.text, text);
assert_eq!(node.block, block);

View File

@ -5,6 +5,8 @@ use crate::source::SourceFile;
use crate::syntax::*;
use crate::util::EcoString;
use std::rc::Rc;
/// An iterator over the tokens of a string of source code.
pub struct Tokens<'s> {
source: &'s SourceFile,
@ -239,11 +241,18 @@ impl<'s> Tokens<'s> {
self.s.eat_assert('u');
self.s.eat_assert('{');
let sequence: EcoString = self.s.eat_while(|c| c.is_ascii_alphanumeric()).into();
NodeKind::UnicodeEscape(UnicodeEscapeToken {
if self.s.eat_if('}') {
NodeKind::UnicodeEscape(Rc::new(UnicodeEscapeToken {
character: resolve_hex(&sequence),
sequence,
terminated: self.s.eat_if('}')
})
}))
} else {
NodeKind::Error(
ErrorPosition::End,
"expected closing brace".into(),
)
}
}
c if c.is_whitespace() => NodeKind::Linebreak,
_ => NodeKind::Text("\\".into()),
@ -307,13 +316,12 @@ impl<'s> Tokens<'s> {
// Special case for empty inline block.
if backticks == 2 {
return NodeKind::Raw(RawToken {
return NodeKind::Raw(Rc::new(RawToken {
text: EcoString::new(),
lang: None,
backticks: 1,
terminated: true,
block: false,
});
}));
}
let start = self.s.index();
@ -330,12 +338,26 @@ impl<'s> Tokens<'s> {
let terminated = found == backticks;
let end = self.s.index() - if terminated { found as usize } else { 0 };
NodeKind::Raw(resolve_raw(
if terminated {
NodeKind::Raw(Rc::new(resolve_raw(
column,
backticks,
self.s.get(start .. end).into(),
terminated,
))
)))
} else {
let remaining = backticks - found;
let noun = if remaining == 1 { "backtick" } else { "backticks" };
NodeKind::Error(
ErrorPosition::End,
if found == 0 {
format!("expected {} {}", remaining, noun)
} else {
format!("expected {} more {}", remaining, noun)
}
.into(),
)
}
}
fn math(&mut self) -> NodeKind {
@ -368,11 +390,22 @@ impl<'s> Tokens<'s> {
(true, true) => 2,
};
NodeKind::Math(MathToken {
if terminated {
NodeKind::Math(Rc::new(MathToken {
formula: self.s.get(start .. end).into(),
display,
terminated,
})
}))
} else {
NodeKind::Error(
ErrorPosition::End,
if display {
"expected closing dollar sign"
} else {
"expected display math closure sequence"
}
.into(),
)
}
}
fn ident(&mut self, start: usize) -> NodeKind {
@ -444,17 +477,19 @@ impl<'s> Tokens<'s> {
fn string(&mut self) -> NodeKind {
let mut escaped = false;
NodeKind::Str(StrToken {
string: resolve_string(self.s.eat_until(|c| {
let string = resolve_string(self.s.eat_until(|c| {
if c == '"' && !escaped {
true
} else {
escaped = c == '\\' && !escaped;
false
}
})),
terminated: self.s.eat_if('"'),
})
}));
if self.s.eat_if('"') {
NodeKind::Str(StrToken { string })
} else {
NodeKind::Error(ErrorPosition::End, "expected quote".into())
}
}
fn line_comment(&mut self) -> NodeKind {
@ -526,39 +561,68 @@ mod tests {
use TokenMode::{Code, Markup};
fn UnicodeEscape(sequence: &str, terminated: bool) -> NodeKind {
NodeKind::UnicodeEscape(UnicodeEscapeToken {
if terminated {
NodeKind::UnicodeEscape(Rc::new(UnicodeEscapeToken {
character: resolve_hex(sequence),
sequence: sequence.into(),
terminated,
})
}))
} else {
NodeKind::Error(ErrorPosition::End, "expected closing brace".into())
}
}
fn Raw(
text: &str,
lang: Option<&str>,
backticks: u8,
terminated: bool,
backticks_left: u8,
backticks_right: u8,
block: bool,
) -> NodeKind {
NodeKind::Raw(RawToken {
if backticks_left == backticks_right {
NodeKind::Raw(Rc::new(RawToken {
text: text.into(),
lang: lang.map(Into::into),
backticks,
terminated,
backticks: backticks_left,
block,
})
}))
} else {
let remaining = backticks_left - backticks_right;
let noun = if remaining == 1 { "backtick" } else { "backticks" };
NodeKind::Error(
ErrorPosition::End,
if backticks_right == 0 {
format!("expected {} {}", remaining, noun)
} else {
format!("expected {} more {}", remaining, noun)
}
.into(),
)
}
}
fn Math(formula: &str, display: bool, terminated: bool) -> NodeKind {
NodeKind::Math(MathToken {
formula: formula.into(),
display,
terminated,
})
if terminated {
NodeKind::Math(Rc::new(MathToken { formula: formula.into(), display }))
} else {
NodeKind::Error(
ErrorPosition::End,
if display {
"expected closing dollar sign"
} else {
"expected display math closure sequence"
}
.into(),
)
}
}
fn Str(string: &str, terminated: bool) -> NodeKind {
NodeKind::Str(StrToken { string: string.into(), terminated })
if terminated {
NodeKind::Str(StrToken { string: string.into() })
} else {
NodeKind::Error(ErrorPosition::End, "expected quote".into())
}
}
fn Text(string: &str) -> NodeKind {
@ -844,22 +908,22 @@ mod tests {
#[test]
fn test_tokenize_raw_blocks() {
// Test basic raw block.
t!(Markup: "``" => Raw("", None, 1, true, false));
t!(Markup: "`raw`" => Raw("raw", None, 1, true, false));
t!(Markup[""]: "`]" => Raw("]", None, 1, false, false));
t!(Markup: "``" => Raw("", None, 1, 1, false));
t!(Markup: "`raw`" => Raw("raw", None, 1, 1, false));
t!(Markup[""]: "`]" => Raw("]", None, 1, 0, false));
// Test special symbols in raw block.
t!(Markup: "`[brackets]`" => Raw("[brackets]", None, 1, true, false));
t!(Markup[""]: r"`\`` " => Raw(r"\", None, 1, true, false), Raw(" ", None, 1, false, false));
t!(Markup: "`[brackets]`" => Raw("[brackets]", None, 1, 1, false));
t!(Markup[""]: r"`\`` " => Raw(r"\", None, 1, 1, false), Raw(" ", None, 1, 0, false));
// Test separated closing backticks.
t!(Markup: "```not `y`e`t```" => Raw("`y`e`t", Some("not"), 3, true, false));
t!(Markup: "```not `y`e`t```" => Raw("`y`e`t", Some("not"), 3, 3, false));
// Test more backticks.
t!(Markup: "``nope``" => Raw("", None, 1, true, false), Text("nope"), Raw("", None, 1, true, false));
t!(Markup: "````🚀````" => Raw("", Some("🚀"), 4, true, false));
t!(Markup[""]: "`````👩‍🚀````noend" => Raw("````noend", Some("👩‍🚀"), 5, false, false));
t!(Markup[""]: "````raw``````" => Raw("", Some("raw"), 4, true, false), Raw("", None, 1, true, false));
t!(Markup: "``nope``" => Raw("", None, 1, 1, false), Text("nope"), Raw("", None, 1, 1, false));
t!(Markup: "````🚀````" => Raw("", Some("🚀"), 4, 4, false));
t!(Markup[""]: "`````👩‍🚀````noend" => Raw("````noend", Some("👩‍🚀"), 5, 0, false));
t!(Markup[""]: "````raw``````" => Raw("", Some("raw"), 4, 4, false), Raw("", None, 1, 1, false));
}
#[test]

View File

@ -121,12 +121,12 @@ pub enum NodeKind {
Text(EcoString),
/// A slash and the letter "u" followed by a hexadecimal unicode entity
/// enclosed in curly braces: `\u{1F5FA}`.
UnicodeEscape(UnicodeEscapeToken),
UnicodeEscape(Rc<UnicodeEscapeToken>),
/// An arbitrary number of backticks followed by inner contents, terminated
/// with the same number of backticks: `` `...` ``.
Raw(RawToken),
Raw(Rc<RawToken>),
/// Dollar signs surrounding inner contents.
Math(MathToken),
Math(Rc<MathToken>),
/// A numbering: `23.`.
///
/// Can also exist without the number: `.`.

View File

@ -2,15 +2,10 @@ use crate::util::EcoString;
/// A quoted string token: `"..."`.
#[derive(Debug, Clone, PartialEq)]
#[repr(transparent)]
pub struct StrToken {
/// The string inside the quotes.
///
/// _Note_: If the string contains escape sequences these are not yet
/// applied to be able to just store a string slice here instead of
/// a `String`. The resolving is done later in the parser.
pub string: EcoString,
/// Whether the closing quote was present.
pub terminated: bool,
}
/// A raw block token: `` `...` ``.
@ -22,8 +17,6 @@ pub struct RawToken {
pub lang: Option<EcoString>,
/// The number of opening backticks.
pub backticks: u8,
/// Whether all closing backticks were present.
pub terminated: bool,
/// Whether to display this as a block.
pub block: bool,
}
@ -36,8 +29,6 @@ pub struct MathToken {
/// Whether the formula is display-level, that is, it is surrounded by
/// `$[..]`.
pub display: bool,
/// Whether the closing dollars were present.
pub terminated: bool,
}
/// A unicode escape sequence token: `\u{1F5FA}`.
@ -47,15 +38,4 @@ pub struct UnicodeEscapeToken {
pub sequence: EcoString,
/// The resulting unicode character.
pub character: Option<char>,
/// Whether the closing brace was present.
pub terminated: bool,
}
/// A unit-bound number token: `1.2em`.
#[derive(Debug, Clone, PartialEq)]
pub struct UnitToken {
/// The number part.
pub number: std::ops::Range<usize>,
/// The unit part.
pub unit: std::ops::Range<usize>,
}

View File

@ -55,5 +55,5 @@ The keyword ```rust let```.
---
// Unterminated.
// Error: 2:1 expected backtick(s)
// Error: 2:1 expected 1 backtick
`endless