mirror of
https://github.com/typst/typst
synced 2025-05-14 04:56:26 +08:00
Unicode escape error moved to tokenizer
This commit is contained in:
parent
c569e14c07
commit
2e7d359e59
@ -44,17 +44,13 @@ fn bench_scan(iai: &mut Iai) {
|
||||
}
|
||||
|
||||
fn bench_tokenize(iai: &mut Iai) {
|
||||
iai.run(|| {
|
||||
Tokens::new(
|
||||
black_box(&SourceFile::detached(SRC)),
|
||||
black_box(TokenMode::Markup),
|
||||
)
|
||||
.count()
|
||||
});
|
||||
let src = SourceFile::detached(SRC);
|
||||
iai.run(|| Tokens::new(black_box(&src), black_box(TokenMode::Markup)).count());
|
||||
}
|
||||
|
||||
fn bench_parse(iai: &mut Iai) {
|
||||
iai.run(|| parse(&SourceFile::detached(SRC)));
|
||||
let src = SourceFile::detached(SRC);
|
||||
iai.run(|| parse(&src));
|
||||
}
|
||||
|
||||
fn bench_eval(iai: &mut Iai) {
|
||||
|
@ -54,7 +54,10 @@ where
|
||||
while !p.eof() && f(p) {
|
||||
markup_node(p, &mut at_start);
|
||||
if let Some(node) = p.last_child() {
|
||||
at_start &= matches!(node.kind(), &NodeKind::Space(_) | &NodeKind::Parbreak | &NodeKind::LineComment | &NodeKind::BlockComment);
|
||||
at_start &= matches!(node.kind(),
|
||||
&NodeKind::Space(_) | &NodeKind::Parbreak |
|
||||
&NodeKind::LineComment | &NodeKind::BlockComment
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@ -88,22 +91,8 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) {
|
||||
| NodeKind::Emph
|
||||
| NodeKind::Strong
|
||||
| NodeKind::Linebreak
|
||||
| NodeKind::Raw(_) => p.eat(),
|
||||
|
||||
NodeKind::UnicodeEscape(u) => {
|
||||
if u.character.is_none() {
|
||||
let src = p.peek_src();
|
||||
p.convert(NodeKind::Error(
|
||||
ErrorPosition::Full,
|
||||
"invalid unicode escape sequence".into(),
|
||||
));
|
||||
p.start();
|
||||
p.end(NodeKind::Text(src.into()));
|
||||
return;
|
||||
}
|
||||
|
||||
p.eat();
|
||||
}
|
||||
| NodeKind::Raw(_)
|
||||
| NodeKind::UnicodeEscape(_) => p.eat(),
|
||||
|
||||
NodeKind::Eq if *at_start => heading(p),
|
||||
NodeKind::ListBullet if *at_start => list_node(p),
|
||||
@ -503,9 +492,8 @@ fn item(p: &mut Parser) -> NodeKind {
|
||||
/// Convert a collection into an array, producing errors for anything other than
|
||||
/// expressions.
|
||||
fn array(p: &mut Parser, items: usize) {
|
||||
p.start_with(items);
|
||||
p.filter_children(
|
||||
0,
|
||||
p.child_count() - items,
|
||||
|x| match x.kind() {
|
||||
NodeKind::Named | NodeKind::ParameterSink => false,
|
||||
_ => true,
|
||||
@ -522,15 +510,14 @@ fn array(p: &mut Parser, items: usize) {
|
||||
},
|
||||
);
|
||||
|
||||
p.end(NodeKind::Array)
|
||||
p.convert_with(items, NodeKind::Array);
|
||||
}
|
||||
|
||||
/// Convert a collection into a dictionary, producing errors for anything other
|
||||
/// than named pairs.
|
||||
fn dict(p: &mut Parser, items: usize) {
|
||||
p.start_with(items);
|
||||
p.filter_children(
|
||||
0,
|
||||
p.child_count() - items,
|
||||
|x| {
|
||||
x.kind() == &NodeKind::Named
|
||||
|| x.kind().is_parenthesis()
|
||||
@ -547,7 +534,7 @@ fn dict(p: &mut Parser, items: usize) {
|
||||
),
|
||||
},
|
||||
);
|
||||
p.end(NodeKind::Dict);
|
||||
p.convert_with(items, NodeKind::Dict);
|
||||
}
|
||||
|
||||
/// Convert a collection into a list of parameters, producing errors for
|
||||
@ -684,8 +671,7 @@ fn let_expr(p: &mut Parser) {
|
||||
return;
|
||||
}
|
||||
|
||||
p.start_with(p.child_count() - offset);
|
||||
p.end(NodeKind::Closure)
|
||||
p.convert_with(p.child_count() - offset, NodeKind::Closure);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -186,9 +186,27 @@ impl<'s> Parser<'s> {
|
||||
}
|
||||
|
||||
pub fn convert(&mut self, kind: NodeKind) {
|
||||
self.start();
|
||||
self.eat();
|
||||
self.end(kind);
|
||||
let len = self.tokens.index() - self.next_start;
|
||||
|
||||
self.children.push(
|
||||
GreenNode::with_child(
|
||||
kind,
|
||||
len,
|
||||
GreenData::new(self.next.clone().unwrap(), len),
|
||||
)
|
||||
.into(),
|
||||
);
|
||||
self.fast_forward();
|
||||
self.success = true;
|
||||
}
|
||||
|
||||
pub fn convert_with(&mut self, preserve: usize, kind: NodeKind) {
|
||||
let preserved: Vec<_> =
|
||||
self.children.drain(self.children.len() - preserve ..).collect();
|
||||
let len = preserved.iter().map(|c| c.len()).sum();
|
||||
self.children
|
||||
.push(GreenNode::with_children(kind, len, preserved).into());
|
||||
self.success = true;
|
||||
}
|
||||
|
||||
/// End the current node and undo its existence, inling all accumulated
|
||||
|
@ -200,7 +200,7 @@ impl<'s> Tokens<'s> {
|
||||
TABLE.get(c as usize).copied().unwrap_or_else(|| c.is_whitespace())
|
||||
});
|
||||
|
||||
NodeKind::Text(resolve_string(self.s.eaten_from(start)))
|
||||
NodeKind::Text(self.s.eaten_from(start).into())
|
||||
}
|
||||
|
||||
fn whitespace(&mut self) -> NodeKind {
|
||||
@ -243,10 +243,16 @@ impl<'s> Tokens<'s> {
|
||||
let sequence: EcoString = self.s.eat_while(|c| c.is_ascii_alphanumeric()).into();
|
||||
|
||||
if self.s.eat_if('}') {
|
||||
NodeKind::UnicodeEscape(Rc::new(UnicodeEscapeToken {
|
||||
character: resolve_hex(&sequence),
|
||||
sequence,
|
||||
}))
|
||||
if let Some(character) = resolve_hex(&sequence) {
|
||||
NodeKind::UnicodeEscape(UnicodeEscapeToken {
|
||||
character,
|
||||
})
|
||||
} else {
|
||||
NodeKind::Error(
|
||||
ErrorPosition::Full,
|
||||
"invalid unicode escape sequence".into(),
|
||||
)
|
||||
}
|
||||
} else {
|
||||
NodeKind::Error(
|
||||
ErrorPosition::End,
|
||||
@ -560,35 +566,21 @@ mod tests {
|
||||
use Option::None;
|
||||
use TokenMode::{Code, Markup};
|
||||
|
||||
fn UnicodeEscape(sequence: &str, terminated: bool) -> NodeKind {
|
||||
if terminated {
|
||||
NodeKind::UnicodeEscape(Rc::new(UnicodeEscapeToken {
|
||||
character: resolve_hex(sequence),
|
||||
sequence: sequence.into(),
|
||||
}))
|
||||
} else {
|
||||
NodeKind::Error(ErrorPosition::End, "expected closing brace".into())
|
||||
}
|
||||
fn UnicodeEscape(character: char) -> NodeKind {
|
||||
NodeKind::UnicodeEscape(UnicodeEscapeToken { character })
|
||||
}
|
||||
|
||||
fn Raw(
|
||||
text: &str,
|
||||
lang: Option<&str>,
|
||||
backticks_left: u8,
|
||||
err_msg: Option<&str>,
|
||||
block: bool,
|
||||
) -> NodeKind {
|
||||
match err_msg {
|
||||
None => NodeKind::Raw(Rc::new(RawToken {
|
||||
text: text.into(),
|
||||
lang: lang.map(Into::into),
|
||||
backticks: backticks_left,
|
||||
block,
|
||||
})),
|
||||
Some(msg) => {
|
||||
NodeKind::Error(ErrorPosition::End, format!("expected {}", msg).into())
|
||||
}
|
||||
}
|
||||
fn Error(pos: ErrorPosition, message: &str) -> NodeKind {
|
||||
NodeKind::Error(pos, message.into())
|
||||
}
|
||||
|
||||
fn Raw(text: &str, lang: Option<&str>, backticks_left: u8, block: bool) -> NodeKind {
|
||||
NodeKind::Raw(Rc::new(RawToken {
|
||||
text: text.into(),
|
||||
lang: lang.map(Into::into),
|
||||
backticks: backticks_left,
|
||||
block,
|
||||
}))
|
||||
}
|
||||
|
||||
fn Math(formula: &str, display: bool, err_msg: Option<&str>) -> NodeKind {
|
||||
@ -795,16 +787,16 @@ mod tests {
|
||||
t!(Markup[" /"]: r#"\""# => Text(r"\"), Text("\""));
|
||||
|
||||
// Test basic unicode escapes.
|
||||
t!(Markup: r"\u{}" => UnicodeEscape("", true));
|
||||
t!(Markup: r"\u{2603}" => UnicodeEscape("2603", true));
|
||||
t!(Markup: r"\u{P}" => UnicodeEscape("P", true));
|
||||
t!(Markup: r"\u{}" => Error(ErrorPosition::Full, "invalid unicode escape sequence"));
|
||||
t!(Markup: r"\u{2603}" => UnicodeEscape('☃'));
|
||||
t!(Markup: r"\u{P}" => Error(ErrorPosition::Full, "invalid unicode escape sequence"));
|
||||
|
||||
// Test unclosed unicode escapes.
|
||||
t!(Markup[" /"]: r"\u{" => UnicodeEscape("", false));
|
||||
t!(Markup[" /"]: r"\u{1" => UnicodeEscape("1", false));
|
||||
t!(Markup[" /"]: r"\u{26A4" => UnicodeEscape("26A4", false));
|
||||
t!(Markup[" /"]: r"\u{1Q3P" => UnicodeEscape("1Q3P", false));
|
||||
t!(Markup: r"\u{1🏕}" => UnicodeEscape("1", false), Text("🏕"), RightBrace);
|
||||
t!(Markup[" /"]: r"\u{" => Error(ErrorPosition::End, "expected closing brace"));
|
||||
t!(Markup[" /"]: r"\u{1" => Error(ErrorPosition::End, "expected closing brace"));
|
||||
t!(Markup[" /"]: r"\u{26A4" => Error(ErrorPosition::End, "expected closing brace"));
|
||||
t!(Markup[" /"]: r"\u{1Q3P" => Error(ErrorPosition::End, "expected closing brace"));
|
||||
t!(Markup: r"\u{1🏕}" => Error(ErrorPosition::End, "expected closing brace"), Text("🏕"), RightBrace);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@ -894,22 +886,22 @@ mod tests {
|
||||
#[test]
|
||||
fn test_tokenize_raw_blocks() {
|
||||
// Test basic raw block.
|
||||
t!(Markup: "``" => Raw("", None, 1, None, false));
|
||||
t!(Markup: "`raw`" => Raw("raw", None, 1, None, false));
|
||||
t!(Markup[""]: "`]" => Raw("]", None, 1, Some("1 backtick"), false));
|
||||
t!(Markup: "``" => Raw("", None, 1, false));
|
||||
t!(Markup: "`raw`" => Raw("raw", None, 1, false));
|
||||
t!(Markup[""]: "`]" => Error(ErrorPosition::End, "expected 1 backtick"));
|
||||
|
||||
// Test special symbols in raw block.
|
||||
t!(Markup: "`[brackets]`" => Raw("[brackets]", None, 1, None, false));
|
||||
t!(Markup[""]: r"`\`` " => Raw(r"\", None, 1, None, false), Raw(" ", None, 1, Some("1 backtick"), false));
|
||||
t!(Markup: "`[brackets]`" => Raw("[brackets]", None, 1, false));
|
||||
t!(Markup[""]: r"`\`` " => Raw(r"\", None, 1, false), Error(ErrorPosition::End, "expected 1 backtick"));
|
||||
|
||||
// Test separated closing backticks.
|
||||
t!(Markup: "```not `y`e`t```" => Raw("`y`e`t", Some("not"), 3, None, false));
|
||||
t!(Markup: "```not `y`e`t```" => Raw("`y`e`t", Some("not"), 3, false));
|
||||
|
||||
// Test more backticks.
|
||||
t!(Markup: "``nope``" => Raw("", None, 1, None, false), Text("nope"), Raw("", None, 1, None, false));
|
||||
t!(Markup: "````🚀````" => Raw("", Some("🚀"), 4, None, false));
|
||||
t!(Markup[""]: "`````👩🚀````noend" => Raw("````noend", Some("👩🚀"), 5, Some("5 backticks"), false));
|
||||
t!(Markup[""]: "````raw``````" => Raw("", Some("raw"), 4, None, false), Raw("", None, 1, None, false));
|
||||
t!(Markup: "``nope``" => Raw("", None, 1, false), Text("nope"), Raw("", None, 1, false));
|
||||
t!(Markup: "````🚀````" => Raw("", Some("🚀"), 4, false));
|
||||
t!(Markup[""]: "`````👩🚀````noend" => Error(ErrorPosition::End, "expected 5 backticks"));
|
||||
t!(Markup[""]: "````raw``````" => Raw("", Some("raw"), 4, false), Raw("", None, 1, false));
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -1,7 +1,6 @@
|
||||
use super::{Expr, Ident, NodeKind, RedNode, RedRef, Span, TypedNode};
|
||||
use crate::node;
|
||||
use crate::util::EcoString;
|
||||
use std::fmt::Write;
|
||||
|
||||
node! {
|
||||
/// The syntactical root capable of representing a full parsed document.
|
||||
@ -50,14 +49,7 @@ impl TypedNode for MarkupNode {
|
||||
NodeKind::Strong => Some(MarkupNode::Strong),
|
||||
NodeKind::Emph => Some(MarkupNode::Emph),
|
||||
NodeKind::Text(s) => Some(MarkupNode::Text(s.clone())),
|
||||
NodeKind::UnicodeEscape(u) => Some(MarkupNode::Text(match u.character {
|
||||
Some(c) => c.into(),
|
||||
None => {
|
||||
let mut eco = EcoString::with_capacity(u.sequence.len() + 4);
|
||||
write!(&mut eco, "\\u{{{}}}", u.sequence).unwrap();
|
||||
eco
|
||||
}
|
||||
})),
|
||||
NodeKind::UnicodeEscape(u) => Some(MarkupNode::Text(u.character.into())),
|
||||
NodeKind::EnDash => Some(MarkupNode::Text(EcoString::from("\u{2013}"))),
|
||||
NodeKind::EmDash => Some(MarkupNode::Text(EcoString::from("\u{2014}"))),
|
||||
NodeKind::NonBreakingSpace => {
|
||||
|
@ -121,7 +121,7 @@ pub enum NodeKind {
|
||||
Text(EcoString),
|
||||
/// A slash and the letter "u" followed by a hexadecimal unicode entity
|
||||
/// enclosed in curly braces: `\u{1F5FA}`.
|
||||
UnicodeEscape(Rc<UnicodeEscapeToken>),
|
||||
UnicodeEscape(UnicodeEscapeToken),
|
||||
/// An arbitrary number of backticks followed by inner contents, terminated
|
||||
/// with the same number of backticks: `` `...` ``.
|
||||
Raw(Rc<RawToken>),
|
||||
|
@ -33,9 +33,8 @@ pub struct MathToken {
|
||||
|
||||
/// A unicode escape sequence token: `\u{1F5FA}`.
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
#[repr(transparent)]
|
||||
pub struct UnicodeEscapeToken {
|
||||
/// The escape sequence between the braces.
|
||||
pub sequence: EcoString,
|
||||
/// The resulting unicode character.
|
||||
pub character: Option<char>,
|
||||
pub character: char,
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user