mirror of
https://github.com/typst/typst
synced 2025-05-14 04:56:26 +08:00
Slim NodeKind
memory footprint
This commit is contained in:
parent
84d35efee3
commit
1c0ac793d2
@ -30,7 +30,6 @@ use std::collections::HashMap;
|
||||
use std::io;
|
||||
use std::mem;
|
||||
use std::path::PathBuf;
|
||||
use std::rc::Rc;
|
||||
|
||||
use crate::diag::{At, Error, StrResult, Trace, Tracepoint, TypResult};
|
||||
use crate::geom::{Angle, Fractional, Length, Relative};
|
||||
@ -475,7 +474,7 @@ impl Eval for ClosureExpr {
|
||||
|
||||
// Clone the body expression so that we don't have a lifetime
|
||||
// dependence on the AST.
|
||||
let body = Rc::new(self.body());
|
||||
let body = self.body().clone();
|
||||
|
||||
// Define the actual function.
|
||||
let func = Function::new(name, move |ctx, args| {
|
||||
|
@ -87,18 +87,10 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) {
|
||||
| NodeKind::NonBreakingSpace
|
||||
| NodeKind::Emph
|
||||
| NodeKind::Strong
|
||||
| NodeKind::Linebreak => p.eat(),
|
||||
| NodeKind::Linebreak
|
||||
| NodeKind::Raw(_) => p.eat(),
|
||||
|
||||
NodeKind::UnicodeEscape(u) => {
|
||||
if !u.terminated {
|
||||
p.convert(NodeKind::Error(
|
||||
ErrorPosition::End,
|
||||
"expected closing brace".into(),
|
||||
));
|
||||
p.unsuccessful();
|
||||
return;
|
||||
}
|
||||
|
||||
if u.character.is_none() {
|
||||
let src = p.peek_src();
|
||||
p.convert(NodeKind::Error(
|
||||
@ -112,18 +104,6 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) {
|
||||
|
||||
p.eat();
|
||||
}
|
||||
NodeKind::Raw(r) => {
|
||||
if !r.terminated {
|
||||
p.convert(NodeKind::Error(
|
||||
ErrorPosition::End,
|
||||
"expected backtick(s)".into(),
|
||||
));
|
||||
p.unsuccessful();
|
||||
return;
|
||||
}
|
||||
|
||||
p.eat();
|
||||
}
|
||||
|
||||
NodeKind::Eq if *at_start => heading(p),
|
||||
NodeKind::ListBullet if *at_start => list_node(p),
|
||||
@ -159,6 +139,7 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) {
|
||||
|
||||
// Comments.
|
||||
NodeKind::LineComment | NodeKind::BlockComment => p.eat(),
|
||||
NodeKind::Error(t, e) if t != &ErrorPosition::Full || e.contains(' ') => p.eat(),
|
||||
|
||||
_ => {
|
||||
*at_start = false;
|
||||
@ -338,6 +319,10 @@ fn primary(p: &mut Parser, atomic: bool) {
|
||||
Some(NodeKind::Import) => import_expr(p),
|
||||
Some(NodeKind::Include) => include_expr(p),
|
||||
|
||||
Some(NodeKind::Error(t, e)) if t != &ErrorPosition::Full || e.contains(' ') => {
|
||||
p.eat();
|
||||
}
|
||||
|
||||
// Nothing.
|
||||
_ => {
|
||||
p.expected("expression");
|
||||
@ -363,13 +348,9 @@ fn literal(p: &mut Parser) -> bool {
|
||||
| NodeKind::Fraction(_)
|
||||
| NodeKind::Length(_, _)
|
||||
| NodeKind::Angle(_, _)
|
||||
| NodeKind::Percentage(_) => p.eat(),
|
||||
NodeKind::Str(s) => {
|
||||
p.eat();
|
||||
if !s.terminated {
|
||||
p.expected_at("quote");
|
||||
}
|
||||
}
|
||||
| NodeKind::Percentage(_)
|
||||
| NodeKind::Str(_) => p.eat(),
|
||||
|
||||
_ => return false,
|
||||
}
|
||||
|
||||
|
@ -46,12 +46,7 @@ pub fn resolve_hex(sequence: &str) -> Option<char> {
|
||||
}
|
||||
|
||||
/// Resolve the language tag and trims the raw text.
|
||||
pub fn resolve_raw(
|
||||
column: usize,
|
||||
backticks: u8,
|
||||
text: &str,
|
||||
terminated: bool,
|
||||
) -> RawToken {
|
||||
pub fn resolve_raw(column: usize, backticks: u8, text: &str) -> RawToken {
|
||||
if backticks > 1 {
|
||||
let (tag, inner) = split_at_lang_tag(text);
|
||||
let (text, block) = trim_and_split_raw(column, inner);
|
||||
@ -59,7 +54,6 @@ pub fn resolve_raw(
|
||||
lang: Some(tag.into()),
|
||||
text: text.into(),
|
||||
backticks,
|
||||
terminated,
|
||||
block,
|
||||
}
|
||||
} else {
|
||||
@ -67,7 +61,6 @@ pub fn resolve_raw(
|
||||
lang: None,
|
||||
text: split_lines(text).join("\n").into(),
|
||||
backticks,
|
||||
terminated,
|
||||
block: false,
|
||||
}
|
||||
}
|
||||
@ -194,7 +187,7 @@ mod tests {
|
||||
text: &str,
|
||||
block: bool,
|
||||
) {
|
||||
let node = resolve_raw(column, backticks, raw, true);
|
||||
let node = resolve_raw(column, backticks, raw);
|
||||
assert_eq!(node.lang.as_deref(), lang);
|
||||
assert_eq!(node.text, text);
|
||||
assert_eq!(node.block, block);
|
||||
|
@ -5,6 +5,8 @@ use crate::source::SourceFile;
|
||||
use crate::syntax::*;
|
||||
use crate::util::EcoString;
|
||||
|
||||
use std::rc::Rc;
|
||||
|
||||
/// An iterator over the tokens of a string of source code.
|
||||
pub struct Tokens<'s> {
|
||||
source: &'s SourceFile,
|
||||
@ -239,11 +241,18 @@ impl<'s> Tokens<'s> {
|
||||
self.s.eat_assert('u');
|
||||
self.s.eat_assert('{');
|
||||
let sequence: EcoString = self.s.eat_while(|c| c.is_ascii_alphanumeric()).into();
|
||||
NodeKind::UnicodeEscape(UnicodeEscapeToken {
|
||||
|
||||
if self.s.eat_if('}') {
|
||||
NodeKind::UnicodeEscape(Rc::new(UnicodeEscapeToken {
|
||||
character: resolve_hex(&sequence),
|
||||
sequence,
|
||||
terminated: self.s.eat_if('}')
|
||||
})
|
||||
}))
|
||||
} else {
|
||||
NodeKind::Error(
|
||||
ErrorPosition::End,
|
||||
"expected closing brace".into(),
|
||||
)
|
||||
}
|
||||
}
|
||||
c if c.is_whitespace() => NodeKind::Linebreak,
|
||||
_ => NodeKind::Text("\\".into()),
|
||||
@ -307,13 +316,12 @@ impl<'s> Tokens<'s> {
|
||||
|
||||
// Special case for empty inline block.
|
||||
if backticks == 2 {
|
||||
return NodeKind::Raw(RawToken {
|
||||
return NodeKind::Raw(Rc::new(RawToken {
|
||||
text: EcoString::new(),
|
||||
lang: None,
|
||||
backticks: 1,
|
||||
terminated: true,
|
||||
block: false,
|
||||
});
|
||||
}));
|
||||
}
|
||||
|
||||
let start = self.s.index();
|
||||
@ -330,12 +338,26 @@ impl<'s> Tokens<'s> {
|
||||
let terminated = found == backticks;
|
||||
let end = self.s.index() - if terminated { found as usize } else { 0 };
|
||||
|
||||
NodeKind::Raw(resolve_raw(
|
||||
if terminated {
|
||||
NodeKind::Raw(Rc::new(resolve_raw(
|
||||
column,
|
||||
backticks,
|
||||
self.s.get(start .. end).into(),
|
||||
terminated,
|
||||
))
|
||||
)))
|
||||
} else {
|
||||
let remaining = backticks - found;
|
||||
let noun = if remaining == 1 { "backtick" } else { "backticks" };
|
||||
|
||||
NodeKind::Error(
|
||||
ErrorPosition::End,
|
||||
if found == 0 {
|
||||
format!("expected {} {}", remaining, noun)
|
||||
} else {
|
||||
format!("expected {} more {}", remaining, noun)
|
||||
}
|
||||
.into(),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
fn math(&mut self) -> NodeKind {
|
||||
@ -368,11 +390,22 @@ impl<'s> Tokens<'s> {
|
||||
(true, true) => 2,
|
||||
};
|
||||
|
||||
NodeKind::Math(MathToken {
|
||||
if terminated {
|
||||
NodeKind::Math(Rc::new(MathToken {
|
||||
formula: self.s.get(start .. end).into(),
|
||||
display,
|
||||
terminated,
|
||||
})
|
||||
}))
|
||||
} else {
|
||||
NodeKind::Error(
|
||||
ErrorPosition::End,
|
||||
if display {
|
||||
"expected closing dollar sign"
|
||||
} else {
|
||||
"expected display math closure sequence"
|
||||
}
|
||||
.into(),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
fn ident(&mut self, start: usize) -> NodeKind {
|
||||
@ -444,17 +477,19 @@ impl<'s> Tokens<'s> {
|
||||
|
||||
fn string(&mut self) -> NodeKind {
|
||||
let mut escaped = false;
|
||||
NodeKind::Str(StrToken {
|
||||
string: resolve_string(self.s.eat_until(|c| {
|
||||
let string = resolve_string(self.s.eat_until(|c| {
|
||||
if c == '"' && !escaped {
|
||||
true
|
||||
} else {
|
||||
escaped = c == '\\' && !escaped;
|
||||
false
|
||||
}
|
||||
})),
|
||||
terminated: self.s.eat_if('"'),
|
||||
})
|
||||
}));
|
||||
if self.s.eat_if('"') {
|
||||
NodeKind::Str(StrToken { string })
|
||||
} else {
|
||||
NodeKind::Error(ErrorPosition::End, "expected quote".into())
|
||||
}
|
||||
}
|
||||
|
||||
fn line_comment(&mut self) -> NodeKind {
|
||||
@ -526,39 +561,68 @@ mod tests {
|
||||
use TokenMode::{Code, Markup};
|
||||
|
||||
fn UnicodeEscape(sequence: &str, terminated: bool) -> NodeKind {
|
||||
NodeKind::UnicodeEscape(UnicodeEscapeToken {
|
||||
if terminated {
|
||||
NodeKind::UnicodeEscape(Rc::new(UnicodeEscapeToken {
|
||||
character: resolve_hex(sequence),
|
||||
sequence: sequence.into(),
|
||||
terminated,
|
||||
})
|
||||
}))
|
||||
} else {
|
||||
NodeKind::Error(ErrorPosition::End, "expected closing brace".into())
|
||||
}
|
||||
}
|
||||
|
||||
fn Raw(
|
||||
text: &str,
|
||||
lang: Option<&str>,
|
||||
backticks: u8,
|
||||
terminated: bool,
|
||||
backticks_left: u8,
|
||||
backticks_right: u8,
|
||||
block: bool,
|
||||
) -> NodeKind {
|
||||
NodeKind::Raw(RawToken {
|
||||
if backticks_left == backticks_right {
|
||||
NodeKind::Raw(Rc::new(RawToken {
|
||||
text: text.into(),
|
||||
lang: lang.map(Into::into),
|
||||
backticks,
|
||||
terminated,
|
||||
backticks: backticks_left,
|
||||
block,
|
||||
})
|
||||
}))
|
||||
} else {
|
||||
let remaining = backticks_left - backticks_right;
|
||||
let noun = if remaining == 1 { "backtick" } else { "backticks" };
|
||||
|
||||
NodeKind::Error(
|
||||
ErrorPosition::End,
|
||||
if backticks_right == 0 {
|
||||
format!("expected {} {}", remaining, noun)
|
||||
} else {
|
||||
format!("expected {} more {}", remaining, noun)
|
||||
}
|
||||
.into(),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
fn Math(formula: &str, display: bool, terminated: bool) -> NodeKind {
|
||||
NodeKind::Math(MathToken {
|
||||
formula: formula.into(),
|
||||
display,
|
||||
terminated,
|
||||
})
|
||||
if terminated {
|
||||
NodeKind::Math(Rc::new(MathToken { formula: formula.into(), display }))
|
||||
} else {
|
||||
NodeKind::Error(
|
||||
ErrorPosition::End,
|
||||
if display {
|
||||
"expected closing dollar sign"
|
||||
} else {
|
||||
"expected display math closure sequence"
|
||||
}
|
||||
.into(),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
fn Str(string: &str, terminated: bool) -> NodeKind {
|
||||
NodeKind::Str(StrToken { string: string.into(), terminated })
|
||||
if terminated {
|
||||
NodeKind::Str(StrToken { string: string.into() })
|
||||
} else {
|
||||
NodeKind::Error(ErrorPosition::End, "expected quote".into())
|
||||
}
|
||||
}
|
||||
|
||||
fn Text(string: &str) -> NodeKind {
|
||||
@ -844,22 +908,22 @@ mod tests {
|
||||
#[test]
|
||||
fn test_tokenize_raw_blocks() {
|
||||
// Test basic raw block.
|
||||
t!(Markup: "``" => Raw("", None, 1, true, false));
|
||||
t!(Markup: "`raw`" => Raw("raw", None, 1, true, false));
|
||||
t!(Markup[""]: "`]" => Raw("]", None, 1, false, false));
|
||||
t!(Markup: "``" => Raw("", None, 1, 1, false));
|
||||
t!(Markup: "`raw`" => Raw("raw", None, 1, 1, false));
|
||||
t!(Markup[""]: "`]" => Raw("]", None, 1, 0, false));
|
||||
|
||||
// Test special symbols in raw block.
|
||||
t!(Markup: "`[brackets]`" => Raw("[brackets]", None, 1, true, false));
|
||||
t!(Markup[""]: r"`\`` " => Raw(r"\", None, 1, true, false), Raw(" ", None, 1, false, false));
|
||||
t!(Markup: "`[brackets]`" => Raw("[brackets]", None, 1, 1, false));
|
||||
t!(Markup[""]: r"`\`` " => Raw(r"\", None, 1, 1, false), Raw(" ", None, 1, 0, false));
|
||||
|
||||
// Test separated closing backticks.
|
||||
t!(Markup: "```not `y`e`t```" => Raw("`y`e`t", Some("not"), 3, true, false));
|
||||
t!(Markup: "```not `y`e`t```" => Raw("`y`e`t", Some("not"), 3, 3, false));
|
||||
|
||||
// Test more backticks.
|
||||
t!(Markup: "``nope``" => Raw("", None, 1, true, false), Text("nope"), Raw("", None, 1, true, false));
|
||||
t!(Markup: "````🚀````" => Raw("", Some("🚀"), 4, true, false));
|
||||
t!(Markup[""]: "`````👩🚀````noend" => Raw("````noend", Some("👩🚀"), 5, false, false));
|
||||
t!(Markup[""]: "````raw``````" => Raw("", Some("raw"), 4, true, false), Raw("", None, 1, true, false));
|
||||
t!(Markup: "``nope``" => Raw("", None, 1, 1, false), Text("nope"), Raw("", None, 1, 1, false));
|
||||
t!(Markup: "````🚀````" => Raw("", Some("🚀"), 4, 4, false));
|
||||
t!(Markup[""]: "`````👩🚀````noend" => Raw("````noend", Some("👩🚀"), 5, 0, false));
|
||||
t!(Markup[""]: "````raw``````" => Raw("", Some("raw"), 4, 4, false), Raw("", None, 1, 1, false));
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -121,12 +121,12 @@ pub enum NodeKind {
|
||||
Text(EcoString),
|
||||
/// A slash and the letter "u" followed by a hexadecimal unicode entity
|
||||
/// enclosed in curly braces: `\u{1F5FA}`.
|
||||
UnicodeEscape(UnicodeEscapeToken),
|
||||
UnicodeEscape(Rc<UnicodeEscapeToken>),
|
||||
/// An arbitrary number of backticks followed by inner contents, terminated
|
||||
/// with the same number of backticks: `` `...` ``.
|
||||
Raw(RawToken),
|
||||
Raw(Rc<RawToken>),
|
||||
/// Dollar signs surrounding inner contents.
|
||||
Math(MathToken),
|
||||
Math(Rc<MathToken>),
|
||||
/// A numbering: `23.`.
|
||||
///
|
||||
/// Can also exist without the number: `.`.
|
||||
|
@ -2,15 +2,10 @@ use crate::util::EcoString;
|
||||
|
||||
/// A quoted string token: `"..."`.
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
#[repr(transparent)]
|
||||
pub struct StrToken {
|
||||
/// The string inside the quotes.
|
||||
///
|
||||
/// _Note_: If the string contains escape sequences these are not yet
|
||||
/// applied to be able to just store a string slice here instead of
|
||||
/// a `String`. The resolving is done later in the parser.
|
||||
pub string: EcoString,
|
||||
/// Whether the closing quote was present.
|
||||
pub terminated: bool,
|
||||
}
|
||||
|
||||
/// A raw block token: `` `...` ``.
|
||||
@ -22,8 +17,6 @@ pub struct RawToken {
|
||||
pub lang: Option<EcoString>,
|
||||
/// The number of opening backticks.
|
||||
pub backticks: u8,
|
||||
/// Whether all closing backticks were present.
|
||||
pub terminated: bool,
|
||||
/// Whether to display this as a block.
|
||||
pub block: bool,
|
||||
}
|
||||
@ -36,8 +29,6 @@ pub struct MathToken {
|
||||
/// Whether the formula is display-level, that is, it is surrounded by
|
||||
/// `$[..]`.
|
||||
pub display: bool,
|
||||
/// Whether the closing dollars were present.
|
||||
pub terminated: bool,
|
||||
}
|
||||
|
||||
/// A unicode escape sequence token: `\u{1F5FA}`.
|
||||
@ -47,15 +38,4 @@ pub struct UnicodeEscapeToken {
|
||||
pub sequence: EcoString,
|
||||
/// The resulting unicode character.
|
||||
pub character: Option<char>,
|
||||
/// Whether the closing brace was present.
|
||||
pub terminated: bool,
|
||||
}
|
||||
|
||||
/// A unit-bound number token: `1.2em`.
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub struct UnitToken {
|
||||
/// The number part.
|
||||
pub number: std::ops::Range<usize>,
|
||||
/// The unit part.
|
||||
pub unit: std::ops::Range<usize>,
|
||||
}
|
||||
|
@ -55,5 +55,5 @@ The keyword ```rust let```.
|
||||
|
||||
---
|
||||
// Unterminated.
|
||||
// Error: 2:1 expected backtick(s)
|
||||
// Error: 2:1 expected 1 backtick
|
||||
`endless
|
||||
|
Loading…
x
Reference in New Issue
Block a user