mirror of
https://github.com/typst/typst
synced 2025-05-14 04:56:26 +08:00
Added Unicode Escaping for body text 👙
This commit is contained in:
parent
2a6cde7272
commit
7041e0938d
@ -110,6 +110,20 @@ impl Parser<'_> {
|
|||||||
self.with_span(SyntaxNode::Text(text.to_string()))
|
self.with_span(SyntaxNode::Text(text.to_string()))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Token::UnicodeEscape(ues) => {
|
||||||
|
if let Some(c) = std::char::from_u32(
|
||||||
|
u32::from_str_radix(ues, 16)
|
||||||
|
.expect("Unicode escape string not convertible to int")
|
||||||
|
) {
|
||||||
|
let mut s = String::with_capacity(1);
|
||||||
|
s.push(c);
|
||||||
|
self.with_span(SyntaxNode::Text(s))
|
||||||
|
} else {
|
||||||
|
error!(@self.feedback, token.span, "invalid unicode codepoint");
|
||||||
|
self.with_span(SyntaxNode::Text("".to_string()))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
unexpected => {
|
unexpected => {
|
||||||
self.eat();
|
self.eat();
|
||||||
error!(
|
error!(
|
||||||
@ -944,6 +958,7 @@ mod tests {
|
|||||||
t!("*hi" => B, T("hi"));
|
t!("*hi" => B, T("hi"));
|
||||||
t!("hi_" => T("hi"), I);
|
t!("hi_" => T("hi"), I);
|
||||||
t!("hi you" => T("hi"), S, T("you"));
|
t!("hi you" => T("hi"), S, T("you"));
|
||||||
|
t!("\\u{1f303}" => T("🌃"));
|
||||||
t!("\n\n\nhello" => P, T("hello"));
|
t!("\n\n\nhello" => P, T("hello"));
|
||||||
t!(r"a\ b" => T("a"), L, S, T("b"));
|
t!(r"a\ b" => T("a"), L, S, T("b"));
|
||||||
t!("`py`" => R!["py"]);
|
t!("`py`" => R!["py"]);
|
||||||
@ -962,6 +977,7 @@ mod tests {
|
|||||||
]);
|
]);
|
||||||
e!("``` hi\nyou" => s(1,3, 1,3, "expected backticks"));
|
e!("``` hi\nyou" => s(1,3, 1,3, "expected backticks"));
|
||||||
e!("```🌍 hi\nyou```" => s(0,3, 0,4, "invalid identifier"));
|
e!("```🌍 hi\nyou```" => s(0,3, 0,4, "invalid identifier"));
|
||||||
|
e!("\\u{d421c809}" => s(0,0, 0,12, "invalid unicode codepoint"));
|
||||||
t!("💜\n\n 🌍" => T("💜"), P, T("🌍"));
|
t!("💜\n\n 🌍" => T("💜"), P, T("🌍"));
|
||||||
|
|
||||||
ts!("hi" => s(0,0, 0,2, T("hi")));
|
ts!("hi" => s(0,0, 0,2, T("hi")));
|
||||||
|
@ -82,6 +82,9 @@ pub enum Token<'s> {
|
|||||||
/// A backslash followed by whitespace in text.
|
/// A backslash followed by whitespace in text.
|
||||||
Backslash,
|
Backslash,
|
||||||
|
|
||||||
|
/// A unicode escape sequence
|
||||||
|
UnicodeEscape(&'s str),
|
||||||
|
|
||||||
/// Raw text.
|
/// Raw text.
|
||||||
Raw {
|
Raw {
|
||||||
/// The raw text (not yet unescaped as for strings).
|
/// The raw text (not yet unescaped as for strings).
|
||||||
@ -136,6 +139,7 @@ impl<'s> Token<'s> {
|
|||||||
Star => "star",
|
Star => "star",
|
||||||
Underscore => "underscore",
|
Underscore => "underscore",
|
||||||
Backslash => "backslash",
|
Backslash => "backslash",
|
||||||
|
UnicodeEscape(_) => "unicode escape sequence",
|
||||||
Raw { .. } => "raw text",
|
Raw { .. } => "raw text",
|
||||||
Code { .. } => "code block",
|
Code { .. } => "code block",
|
||||||
Text(_) => "text",
|
Text(_) => "text",
|
||||||
@ -426,6 +430,41 @@ impl<'s> Tokens<'s> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
match self.peek() {
|
match self.peek() {
|
||||||
|
Some(c) if c == 'u' => {
|
||||||
|
// Index which points to start of escape sequence
|
||||||
|
let index = self.index() - 1;
|
||||||
|
self.eat();
|
||||||
|
|
||||||
|
if self.peek() == Some('{') {
|
||||||
|
self.eat();
|
||||||
|
// This loop will eat all hexadecimal chars and an
|
||||||
|
// optional closing brace (brace not in end index range).
|
||||||
|
let mut end = self.index();
|
||||||
|
let mut valid = true;
|
||||||
|
while let Some(c) = self.peek() {
|
||||||
|
if c == '}' {
|
||||||
|
self.eat();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if !c.is_ascii_hexdigit() {
|
||||||
|
valid = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
self.eat();
|
||||||
|
end = self.index();
|
||||||
|
}
|
||||||
|
if valid == false {
|
||||||
|
// There are only 8-bit ASCII chars in that range
|
||||||
|
Text(&self.src[index..end])
|
||||||
|
} else {
|
||||||
|
UnicodeEscape(&self.src[index + 3..end])
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
Text("\\u")
|
||||||
|
}
|
||||||
|
}
|
||||||
Some(c) if is_escapable(c) => {
|
Some(c) if is_escapable(c) => {
|
||||||
let index = self.index();
|
let index = self.index();
|
||||||
self.eat();
|
self.eat();
|
||||||
@ -579,6 +618,7 @@ mod tests {
|
|||||||
Plus,
|
Plus,
|
||||||
Hyphen as Min,
|
Hyphen as Min,
|
||||||
Slash,
|
Slash,
|
||||||
|
UnicodeEscape as UE,
|
||||||
Star,
|
Star,
|
||||||
Text as T,
|
Text as T,
|
||||||
};
|
};
|
||||||
@ -708,6 +748,8 @@ mod tests {
|
|||||||
t!(Body, r"\_" => T("_"));
|
t!(Body, r"\_" => T("_"));
|
||||||
t!(Body, r"\`" => T("`"));
|
t!(Body, r"\`" => T("`"));
|
||||||
t!(Body, r"\/" => T("/"));
|
t!(Body, r"\/" => T("/"));
|
||||||
|
t!(Body, r"\u{2603}" => UE("2603"));
|
||||||
|
t!(Body, r"\u{26A4" => UE("26A4"));
|
||||||
t!(Body, r#"\""# => T("\""));
|
t!(Body, r#"\""# => T("\""));
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -716,6 +758,9 @@ mod tests {
|
|||||||
t!(Body, r"\a" => T("\\"), T("a"));
|
t!(Body, r"\a" => T("\\"), T("a"));
|
||||||
t!(Body, r"\:" => T(r"\"), T(":"));
|
t!(Body, r"\:" => T(r"\"), T(":"));
|
||||||
t!(Body, r"\=" => T(r"\"), T("="));
|
t!(Body, r"\=" => T(r"\"), T("="));
|
||||||
|
t!(Body, r"\u{2GA4"=> T(r"\u{2"), Text("GA4"));
|
||||||
|
t!(Body, r"\u{ " => T(r"\u{"), Space(0));
|
||||||
|
t!(Body, r"\u" => T(r"\u"));
|
||||||
t!(Header, r"\\\\" => Invalid(r"\\\\"));
|
t!(Header, r"\\\\" => Invalid(r"\\\\"));
|
||||||
t!(Header, r"\a" => Invalid(r"\a"));
|
t!(Header, r"\a" => Invalid(r"\a"));
|
||||||
t!(Header, r"\:" => Invalid(r"\"), Colon);
|
t!(Header, r"\:" => Invalid(r"\"), Colon);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user