mirror of
https://github.com/typst/typst
synced 2025-05-14 04:56:26 +08:00
Refactor raw blocks 💱
This commit is contained in:
parent
7cc279f7ae
commit
4077a7c11e
@ -5,7 +5,7 @@ use super::text::{layout_text, TextContext};
|
||||
use super::*;
|
||||
use crate::style::LayoutStyle;
|
||||
use crate::syntax::{
|
||||
CallExpr, Code, Decoration, Heading, Span, SpanWith, Spanned, SyntaxNode, SyntaxTree,
|
||||
CallExpr, Decoration, Heading, Raw, Span, SpanWith, Spanned, SyntaxNode, SyntaxTree,
|
||||
};
|
||||
use crate::{DynFuture, Feedback, Pass};
|
||||
|
||||
@ -83,8 +83,7 @@ impl<'a> TreeLayouter<'a> {
|
||||
|
||||
SyntaxNode::Heading(heading) => self.layout_heading(heading).await,
|
||||
|
||||
SyntaxNode::Raw(lines) => self.layout_raw(lines).await,
|
||||
SyntaxNode::Code(block) => self.layout_code(block).await,
|
||||
SyntaxNode::Raw(raw) => self.layout_raw(raw).await,
|
||||
|
||||
SyntaxNode::Call(call) => {
|
||||
self.layout_call(call.span_with(node.span)).await;
|
||||
@ -128,14 +127,18 @@ impl<'a> TreeLayouter<'a> {
|
||||
self.style.text = style;
|
||||
}
|
||||
|
||||
async fn layout_raw(&mut self, lines: &[String]) {
|
||||
async fn layout_raw(&mut self, raw: &Raw) {
|
||||
if !raw.inline {
|
||||
self.layout_parbreak();
|
||||
}
|
||||
|
||||
// TODO: Make this more efficient.
|
||||
let fallback = self.style.text.fallback.clone();
|
||||
self.style.text.fallback.list.insert(0, "monospace".to_string());
|
||||
self.style.text.fallback.flatten();
|
||||
|
||||
let mut first = true;
|
||||
for line in lines {
|
||||
for line in &raw.lines {
|
||||
if !first {
|
||||
self.layouter.finish_line();
|
||||
}
|
||||
@ -144,18 +147,10 @@ impl<'a> TreeLayouter<'a> {
|
||||
}
|
||||
|
||||
self.style.text.fallback = fallback;
|
||||
}
|
||||
|
||||
async fn layout_code(&mut self, code: &Code) {
|
||||
if code.block {
|
||||
if !raw.inline {
|
||||
self.layout_parbreak();
|
||||
}
|
||||
|
||||
self.layout_raw(&code.lines).await;
|
||||
|
||||
if code.block {
|
||||
self.layout_parbreak()
|
||||
}
|
||||
}
|
||||
|
||||
async fn layout_call(&mut self, call: Spanned<&CallExpr>) {
|
||||
|
@ -1,4 +1,5 @@
|
||||
use super::is_newline_char;
|
||||
use crate::syntax::{Ident, Raw};
|
||||
|
||||
/// Resolves all escape sequences in a string.
|
||||
pub fn unescape_string(string: &str) -> String {
|
||||
@ -56,101 +57,60 @@ pub fn unescape_string(string: &str) -> String {
|
||||
out
|
||||
}
|
||||
|
||||
/// Resolves all escape sequences in raw markup (between backticks) and splits it into
|
||||
/// into lines.
|
||||
pub fn unescape_raw(raw: &str) -> Vec<String> {
|
||||
/// Resolves the language tag and trims the raw text.
|
||||
///
|
||||
/// Returns:
|
||||
/// - The language tag
|
||||
/// - The raw lines
|
||||
/// - Whether at least one newline was present in the untrimmed text.
|
||||
pub fn process_raw(raw: &str) -> Raw {
|
||||
let (lang, inner) = split_after_lang_tag(raw);
|
||||
let (lines, had_newline) = trim_and_split_raw(inner);
|
||||
Raw { lang, lines, inline: !had_newline }
|
||||
}
|
||||
|
||||
/// Parse the lang tag and return it alongside the remaining inner raw text.
|
||||
fn split_after_lang_tag(raw: &str) -> (Option<Ident>, &str) {
|
||||
let mut lang = String::new();
|
||||
|
||||
let mut inner = raw;
|
||||
let mut iter = raw.chars();
|
||||
let mut text = String::new();
|
||||
|
||||
while let Some(c) = iter.next() {
|
||||
if c == '\\' {
|
||||
if let Some(c) = iter.next() {
|
||||
if c != '\\' && c != '`' {
|
||||
text.push('\\');
|
||||
}
|
||||
|
||||
text.push(c);
|
||||
} else {
|
||||
text.push('\\');
|
||||
}
|
||||
} else {
|
||||
text.push(c);
|
||||
if c == '`' || c.is_whitespace() || is_newline_char(c) {
|
||||
break;
|
||||
}
|
||||
|
||||
inner = iter.as_str();
|
||||
lang.push(c);
|
||||
}
|
||||
|
||||
split_lines(&text)
|
||||
(Ident::new(lang), inner)
|
||||
}
|
||||
|
||||
/// Resolves all escape sequences in code markup (between triple backticks) and splits it
|
||||
/// into into lines.
|
||||
pub fn unescape_code(raw: &str) -> Vec<String> {
|
||||
let mut iter = raw.chars().peekable();
|
||||
let mut text = String::new();
|
||||
let mut backticks = 0u32;
|
||||
let mut update_backtick_count;
|
||||
/// Trims raw text and splits it into lines.
|
||||
///
|
||||
/// Returns whether at least one newline was contained in `raw`.
|
||||
fn trim_and_split_raw(raw: &str) -> (Vec<String>, bool) {
|
||||
// Trims one whitespace at end and start.
|
||||
let raw = raw.strip_prefix(' ').unwrap_or(raw);
|
||||
let raw = raw.strip_suffix(' ').unwrap_or(raw);
|
||||
|
||||
while let Some(c) = iter.next() {
|
||||
update_backtick_count = true;
|
||||
let mut lines = split_lines(raw);
|
||||
let had_newline = lines.len() > 1;
|
||||
let is_whitespace = |line: &String| line.chars().all(char::is_whitespace);
|
||||
|
||||
if c == '\\' && backticks > 0 {
|
||||
let mut tail = String::new();
|
||||
let mut escape_success = false;
|
||||
let mut backticks_after_slash = 0u32;
|
||||
|
||||
while let Some(&s) = iter.peek() {
|
||||
match s {
|
||||
'\\' => {
|
||||
if backticks_after_slash == 0 {
|
||||
tail.push('\\');
|
||||
} else {
|
||||
// Pattern like `\`\` should fail
|
||||
// escape and just be printed verbantim.
|
||||
break;
|
||||
}
|
||||
}
|
||||
'`' => {
|
||||
tail.push(s);
|
||||
backticks_after_slash += 1;
|
||||
if backticks_after_slash == 2 {
|
||||
escape_success = true;
|
||||
iter.next();
|
||||
break;
|
||||
}
|
||||
}
|
||||
_ => break,
|
||||
}
|
||||
|
||||
iter.next();
|
||||
}
|
||||
|
||||
if !escape_success {
|
||||
text.push(c);
|
||||
backticks = backticks_after_slash;
|
||||
update_backtick_count = false;
|
||||
} else {
|
||||
backticks = 0;
|
||||
}
|
||||
|
||||
text.push_str(&tail);
|
||||
} else {
|
||||
text.push(c);
|
||||
}
|
||||
|
||||
if update_backtick_count {
|
||||
if c == '`' {
|
||||
backticks += 1;
|
||||
} else {
|
||||
backticks = 0;
|
||||
}
|
||||
}
|
||||
// Trims a sequence of whitespace followed by a newline at the start.
|
||||
if lines.first().map(is_whitespace).unwrap_or(false) {
|
||||
lines.remove(0);
|
||||
}
|
||||
|
||||
split_lines(&text)
|
||||
}
|
||||
// Trims a newline followed by a sequence of whitespace at the end.
|
||||
if lines.last().map(is_whitespace).unwrap_or(false) {
|
||||
lines.pop();
|
||||
}
|
||||
|
||||
/// Converts a hexademical sequence (without braces or "\u") into a character.
|
||||
pub fn hex_to_char(sequence: &str) -> Option<char> {
|
||||
u32::from_str_radix(sequence, 16).ok().and_then(std::char::from_u32)
|
||||
(lines, had_newline)
|
||||
}
|
||||
|
||||
/// Splits a string into a vector of lines (respecting Unicode & Windows line breaks).
|
||||
@ -175,12 +135,17 @@ pub fn split_lines(text: &str) -> Vec<String> {
|
||||
lines
|
||||
}
|
||||
|
||||
/// Converts a hexademical sequence (without braces or "\u") into a character.
|
||||
pub fn hex_to_char(sequence: &str) -> Option<char> {
|
||||
u32::from_str_radix(sequence, 16).ok().and_then(std::char::from_u32)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
#[rustfmt::skip]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
#[rustfmt::skip]
|
||||
fn test_unescape_strings() {
|
||||
fn test(string: &str, expected: &str) {
|
||||
assert_eq!(unescape_string(string), expected.to_string());
|
||||
@ -201,43 +166,48 @@ mod tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[rustfmt::skip]
|
||||
fn test_unescape_raws() {
|
||||
fn test(raw: &str, expected: Vec<&str>) {
|
||||
assert_eq!(unescape_raw(raw), expected);
|
||||
fn test_split_after_lang_tag() {
|
||||
fn test(raw: &str, lang: Option<&str>, inner: &str) {
|
||||
let (found_lang, found_inner) = split_after_lang_tag(raw);
|
||||
assert_eq!(found_lang.as_ref().map(|id| id.as_str()), lang);
|
||||
assert_eq!(found_inner, inner);
|
||||
}
|
||||
|
||||
test("typst it!", Some("typst"), " it!");
|
||||
test("typst\n it!", Some("typst"), "\n it!");
|
||||
test("typst\n it!", Some("typst"), "\n it!");
|
||||
test("abc`", Some("abc"), "`");
|
||||
test(" hi", None, " hi");
|
||||
test("`", None, "`");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_trim_raw() {
|
||||
fn test(raw: &str, expected: Vec<&str>) {
|
||||
assert_eq!(trim_and_split_raw(raw).0, expected);
|
||||
}
|
||||
|
||||
test(" hi", vec!["hi"]);
|
||||
test(" hi", vec![" hi"]);
|
||||
test("\nhi", vec!["hi"]);
|
||||
test(" \n hi", vec![" hi"]);
|
||||
test("hi ", vec!["hi"]);
|
||||
test("hi ", vec!["hi "]);
|
||||
test("hi\n", vec!["hi"]);
|
||||
test("hi \n ", vec!["hi "]);
|
||||
test(" \n hi \n ", vec![" hi "]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_split_lines() {
|
||||
fn test(raw: &str, expected: Vec<&str>) {
|
||||
assert_eq!(split_lines(raw), expected);
|
||||
}
|
||||
|
||||
test("raw\\`", vec!["raw`"]);
|
||||
test("raw\\\\`", vec!["raw\\`"]);
|
||||
test("raw\ntext", vec!["raw", "text"]);
|
||||
test("a\r\nb", vec!["a", "b"]);
|
||||
test("a\n\nb", vec!["a", "", "b"]);
|
||||
test("a\r\x0Bb", vec!["a", "", "b"]);
|
||||
test("a\r\n\r\nb", vec!["a", "", "b"]);
|
||||
test("raw\\a", vec!["raw\\a"]);
|
||||
test("raw\\", vec!["raw\\"]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[rustfmt::skip]
|
||||
fn test_unescape_code() {
|
||||
fn test(raw: &str, expected: Vec<&str>) {
|
||||
assert_eq!(unescape_code(raw), expected);
|
||||
}
|
||||
|
||||
test("code\\`", vec!["code\\`"]);
|
||||
test("code`\\``", vec!["code```"]);
|
||||
test("code`\\`a", vec!["code`\\`a"]);
|
||||
test("code``hi`\\``", vec!["code``hi```"]);
|
||||
test("code`\\\\``", vec!["code`\\``"]);
|
||||
test("code`\\`\\`go", vec!["code`\\`\\`go"]);
|
||||
test("code`\\`\\``", vec!["code`\\```"]);
|
||||
test("code\ntext", vec!["code", "text"]);
|
||||
test("a\r\nb", vec!["a", "b"]);
|
||||
test("a\n\nb", vec!["a", "", "b"]);
|
||||
test("a\r\x0Bb", vec!["a", "", "b"]);
|
||||
test("a\r\n\r\nb", vec!["a", "", "b"]);
|
||||
test("code\\a", vec!["code\\a"]);
|
||||
test("code\\", vec!["code\\"]);
|
||||
}
|
||||
}
|
||||
|
@ -99,35 +99,22 @@ impl Parser<'_> {
|
||||
self.parse_heading().map(SyntaxNode::Heading)
|
||||
}
|
||||
|
||||
Token::Raw { raw, terminated } => {
|
||||
Token::Raw { raw, backticks, terminated } => {
|
||||
if !terminated {
|
||||
error!(@self.feedback, end, "expected backtick");
|
||||
}
|
||||
self.with_span(SyntaxNode::Raw(unescape_raw(raw)))
|
||||
}
|
||||
|
||||
Token::Code { lang, raw, terminated } => {
|
||||
if !terminated {
|
||||
error!(@self.feedback, end, "expected backticks");
|
||||
error!(@self.feedback, end, "expected backtick(s)");
|
||||
}
|
||||
|
||||
let lang = lang.and_then(|lang| {
|
||||
if let Some(ident) = Ident::new(lang.v) {
|
||||
Some(ident.span_with(lang.span))
|
||||
} else {
|
||||
error!(@self.feedback, lang.span, "invalid identifier");
|
||||
None
|
||||
let raw = if backticks > 1 {
|
||||
process_raw(raw)
|
||||
} else {
|
||||
Raw {
|
||||
lang: None,
|
||||
lines: split_lines(raw),
|
||||
inline: true,
|
||||
}
|
||||
});
|
||||
};
|
||||
|
||||
let mut lines = unescape_code(raw);
|
||||
let block = lines.len() > 1;
|
||||
|
||||
if lines.last().map(|s| s.is_empty()).unwrap_or(false) {
|
||||
lines.pop();
|
||||
}
|
||||
|
||||
self.with_span(SyntaxNode::Code(Code { lang, lines, block }))
|
||||
self.with_span(SyntaxNode::Raw(raw))
|
||||
}
|
||||
|
||||
Token::Text(text) => self.with_span(SyntaxNode::Text(text.to_string())),
|
||||
|
@ -29,24 +29,17 @@ macro_rules! H {
|
||||
}
|
||||
|
||||
macro_rules! R {
|
||||
($($line:expr),* $(,)?) => {
|
||||
SyntaxNode::Raw(vec![$($line.to_string()),*])
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! C {
|
||||
($lang:expr, $($line:expr),* $(,)?) => {{
|
||||
let lines = vec![$($line.to_string()) ,*];
|
||||
SyntaxNode::Code(Code {
|
||||
($lang:expr, $inline:expr, $($line:expr),* $(,)?) => {{
|
||||
SyntaxNode::Raw(Raw {
|
||||
lang: $lang,
|
||||
block: lines.len() > 1,
|
||||
lines,
|
||||
lines: vec![$($line.to_string()) ,*],
|
||||
inline: $inline,
|
||||
})
|
||||
}};
|
||||
}
|
||||
|
||||
fn Lang<'a, T: Into<Spanned<&'a str>>>(lang: T) -> Option<Spanned<Ident>> {
|
||||
Some(Into::<Spanned<&str>>::into(lang).map(|s| Ident(s.to_string())))
|
||||
fn Lang(lang: &str) -> Option<Ident> {
|
||||
Some(Ident(lang.to_string()))
|
||||
}
|
||||
|
||||
macro_rules! F {
|
||||
@ -220,19 +213,7 @@ fn test_parse_simple_nodes() {
|
||||
t!("\\u{1f303}" => T("🌃"));
|
||||
t!("\n\n\nhello" => P, T("hello"));
|
||||
t!(r"a\ b" => T("a"), L, S, T("b"));
|
||||
t!("`py`" => R!["py"]);
|
||||
t!("`hi\nyou" => R!["hi", "you"]);
|
||||
e!("`hi\nyou" => s(7, 7, "expected backtick"));
|
||||
t!("`hi\\`du`" => R!["hi`du"]);
|
||||
|
||||
ts!("```java out```" => s(0, 14, C![Lang(s(3, 7, "java")), "out"]));
|
||||
t!("``` console.log(\n\"alert\"\n)" => C![None, "console.log(", "\"alert\"", ")"]);
|
||||
t!("```typst \r\n Typst uses `\\`` to indicate code blocks" => C![
|
||||
Lang("typst"), " Typst uses ``` to indicate code blocks"
|
||||
]);
|
||||
|
||||
e!("``` hi\nyou" => s(10, 10, "expected backticks"));
|
||||
e!("```🌍 hi\nyou```" => s(3, 7, "invalid identifier"));
|
||||
e!("\\u{d421c809}" => s(0, 12, "invalid unicode escape sequence"));
|
||||
e!("\\u{abc" => s(6, 6, "expected closing brace"));
|
||||
t!("💜\n\n 🌍" => T("💜"), P, T("🌍"));
|
||||
@ -242,6 +223,33 @@ fn test_parse_simple_nodes() {
|
||||
ts!("💜\n\n 🌍" => s(0, 4, T("💜")), s(4, 7, P), s(7, 11, T("🌍")));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_raw() {
|
||||
t!("`py`" => R![None, true, "py"]);
|
||||
t!("`hi\nyou" => R![None, true, "hi", "you"]);
|
||||
t!(r"`` hi\`du``" => R![None, true, r"hi\`du"]);
|
||||
|
||||
// More than one backtick with optional language tag.
|
||||
t!("``` console.log(\n\"alert\"\n)" => R![None, false, "console.log(", "\"alert\"", ")"]);
|
||||
t!("````typst \r\n Typst uses ``` to indicate code blocks````!"
|
||||
=> R![Lang("typst"), false, " Typst uses ``` to indicate code blocks"], T("!"));
|
||||
|
||||
// Trimming of whitespace.
|
||||
t!("`` a ``" => R![None, true, "a"]);
|
||||
t!("`` a ``" => R![None, true, "a "]);
|
||||
t!("`` ` ``" => R![None, true, "`"]);
|
||||
t!("``` ` ```" => R![None, true, " ` "]);
|
||||
t!("``` ` \n ```" => R![None, false, " ` "]);
|
||||
|
||||
// Errors.
|
||||
e!("`hi\nyou" => s(7, 7, "expected backtick(s)"));
|
||||
e!("``` hi\nyou" => s(10, 10, "expected backtick(s)"));
|
||||
|
||||
// TODO: Bring back when spans/errors are in place.
|
||||
// ts!("``java out``" => s(0, 12, R![Lang(s(2, 6, "java")), true, "out"]));
|
||||
// e!("```🌍 hi\nyou```" => s(3, 7, "invalid identifier"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_comments() {
|
||||
// In body.
|
||||
@ -348,7 +356,7 @@ fn test_parse_function_bodies() {
|
||||
e!(" [val][ */]" => s(8, 10, "unexpected end of block comment"));
|
||||
|
||||
// Raw in body.
|
||||
t!("[val][`Hi]`" => F!("val"; Tree![R!["Hi]"]]));
|
||||
t!("[val][`Hi]`" => F!("val"; Tree![R![None, true, "Hi]"]]));
|
||||
e!("[val][`Hi]`" => s(11, 11, "expected closing bracket"));
|
||||
|
||||
// Crazy.
|
||||
|
@ -56,7 +56,7 @@ impl<'s> Tokens<'s> {
|
||||
/// The position in the string at which the last token ends and next token
|
||||
/// will start.
|
||||
pub fn pos(&self) -> Pos {
|
||||
Pos(self.index as u32)
|
||||
self.index.into()
|
||||
}
|
||||
}
|
||||
|
||||
@ -111,7 +111,7 @@ impl<'s> Iterator for Tokens<'s> {
|
||||
|
||||
// Style toggles.
|
||||
'_' if self.mode == Body => Underscore,
|
||||
'`' if self.mode == Body => self.read_raw_or_code(),
|
||||
'`' if self.mode == Body => self.read_raw(),
|
||||
|
||||
// Sections.
|
||||
'#' if self.mode == Body => Hashtag,
|
||||
@ -230,66 +230,31 @@ impl<'s> Tokens<'s> {
|
||||
Str { string, terminated }
|
||||
}
|
||||
|
||||
fn read_raw_or_code(&mut self) -> Token<'s> {
|
||||
let (raw, terminated) = self.read_until_unescaped('`');
|
||||
if raw.is_empty() && terminated && self.peek() == Some('`') {
|
||||
// Third tick found; this is a code block.
|
||||
fn read_raw(&mut self) -> Token<'s> {
|
||||
let mut backticks = 1;
|
||||
while self.peek() == Some('`') {
|
||||
self.eat();
|
||||
backticks += 1;
|
||||
}
|
||||
|
||||
// Reads the lang tag (until newline or whitespace).
|
||||
let start = self.pos();
|
||||
let (lang, _) = self.read_string_until(false, 0, 0, |c| {
|
||||
c == '`' || c.is_whitespace() || is_newline_char(c)
|
||||
});
|
||||
let end = self.pos();
|
||||
let start = self.index;
|
||||
|
||||
let lang = if !lang.is_empty() {
|
||||
Some(lang.span_with(Span::new(start, end)))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
// Skip to start of raw contents.
|
||||
while let Some(c) = self.peek() {
|
||||
if is_newline_char(c) {
|
||||
self.eat();
|
||||
if c == '\r' && self.peek() == Some('\n') {
|
||||
self.eat();
|
||||
}
|
||||
|
||||
break;
|
||||
} else if c.is_whitespace() {
|
||||
self.eat();
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
let mut found = 0;
|
||||
while found < backticks {
|
||||
match self.eat() {
|
||||
Some('`') => found += 1,
|
||||
Some(_) => found = 0,
|
||||
None => break,
|
||||
}
|
||||
}
|
||||
|
||||
let start = self.index;
|
||||
let mut backticks = 0u32;
|
||||
let terminated = found == backticks;
|
||||
let end = self.index - if terminated { found } else { 0 };
|
||||
|
||||
while backticks < 3 {
|
||||
match self.eat() {
|
||||
Some('`') => backticks += 1,
|
||||
// Escaping of triple backticks.
|
||||
Some('\\') if backticks == 1 && self.peek() == Some('`') => {
|
||||
backticks = 0;
|
||||
}
|
||||
Some(_) => {}
|
||||
None => break,
|
||||
}
|
||||
}
|
||||
|
||||
let terminated = backticks == 3;
|
||||
let end = self.index - if terminated { 3 } else { 0 };
|
||||
|
||||
Code {
|
||||
lang,
|
||||
raw: &self.src[start .. end],
|
||||
terminated,
|
||||
}
|
||||
} else {
|
||||
Raw { raw, terminated }
|
||||
Raw {
|
||||
raw: &self.src[start .. end],
|
||||
backticks,
|
||||
terminated,
|
||||
}
|
||||
}
|
||||
|
||||
@ -469,18 +434,8 @@ mod tests {
|
||||
fn Str(string: &str, terminated: bool) -> Token {
|
||||
Token::Str { string, terminated }
|
||||
}
|
||||
fn Raw(raw: &str, terminated: bool) -> Token {
|
||||
Token::Raw { raw, terminated }
|
||||
}
|
||||
fn Code<'a>(
|
||||
lang: Option<Spanned<&'a str>>,
|
||||
raw: &'a str,
|
||||
terminated: bool,
|
||||
) -> Token<'a> {
|
||||
Token::Code { lang, raw, terminated }
|
||||
}
|
||||
fn Lang<'a, T: Into<Spanned<&'a str>>>(lang: T) -> Option<Spanned<&'a str>> {
|
||||
Some(Into::<Spanned<&str>>::into(lang))
|
||||
fn Raw(raw: &str, backticks: usize, terminated: bool) -> Token {
|
||||
Token::Raw { raw, backticks, terminated }
|
||||
}
|
||||
fn UE(sequence: &str, terminated: bool) -> Token {
|
||||
Token::UnicodeEscape { sequence, terminated }
|
||||
@ -535,20 +490,32 @@ mod tests {
|
||||
t!(Body, "***" => Star, Star, Star);
|
||||
t!(Body, "[func]*bold*" => L, T("func"), R, Star, T("bold"), Star);
|
||||
t!(Body, "hi_you_ there" => T("hi"), Underscore, T("you"), Underscore, S(0), T("there"));
|
||||
t!(Body, "`raw`" => Raw("raw", true));
|
||||
t!(Body, "# hi" => Hashtag, S(0), T("hi"));
|
||||
t!(Body, "#()" => Hashtag, T("()"));
|
||||
t!(Body, "`[func]`" => Raw("[func]", true));
|
||||
t!(Body, "`]" => Raw("]", false));
|
||||
t!(Body, "\\ " => Backslash, S(0));
|
||||
t!(Body, "`\\``" => Raw("\\`", true));
|
||||
t!(Body, "``not code`" => Raw("", true), T("not"), S(0), T("code"), Raw("", false));
|
||||
t!(Body, "```rust hi```" => Code(Lang("rust"), "hi", true));
|
||||
t!(Body, "``` hi`\\``" => Code(None, "hi`\\``", false));
|
||||
t!(Body, "```js \r\n document.write(\"go\")" => Code(Lang("js"), " document.write(\"go\")", false));
|
||||
t!(Header, "_`" => Invalid("_`"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_tokenize_raw() {
|
||||
// Basics.
|
||||
t!(Body, "`raw`" => Raw("raw", 1, true));
|
||||
t!(Body, "`[func]`" => Raw("[func]", 1, true));
|
||||
t!(Body, "`]" => Raw("]", 1, false));
|
||||
t!(Body, r"`\`` " => Raw(r"\", 1, true), Raw(" ", 1, false));
|
||||
|
||||
// Language tag.
|
||||
t!(Body, "``` hi```" => Raw(" hi", 3, true));
|
||||
t!(Body, "```rust hi```" => Raw("rust hi", 3, true));
|
||||
t!(Body, r"``` hi\````" => Raw(r" hi\", 3, true), Raw("", 1, false));
|
||||
t!(Body, "``` not `y`e`t finished```" => Raw(" not `y`e`t finished", 3, true));
|
||||
t!(Body, "```js \r\n document.write(\"go\")`"
|
||||
=> Raw("js \r\n document.write(\"go\")`", 3, false));
|
||||
|
||||
// More backticks.
|
||||
t!(Body, "`````` ``````hi" => Raw(" ", 6, true), T("hi"));
|
||||
t!(Body, "````\n```js\nalert()\n```\n````" => Raw("\n```js\nalert()\n```\n", 4, true));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tokenize_header_only_tokens() {
|
||||
t!(Body, "a: b" => T("a:"), S(0), T("b"));
|
||||
|
@ -189,6 +189,12 @@ impl From<u32> for Pos {
|
||||
}
|
||||
}
|
||||
|
||||
impl From<usize> for Pos {
|
||||
fn from(index: usize) -> Self {
|
||||
Self(index as u32)
|
||||
}
|
||||
}
|
||||
|
||||
impl Offset for Pos {
|
||||
fn offset(self, by: Self) -> Self {
|
||||
Pos(self.0 + by.0)
|
||||
|
@ -1,6 +1,5 @@
|
||||
//! Tokenization.
|
||||
|
||||
use super::span::Spanned;
|
||||
use crate::length::Length;
|
||||
|
||||
/// A minimal semantic entity of source code.
|
||||
@ -86,21 +85,13 @@ pub enum Token<'s> {
|
||||
terminated: bool,
|
||||
},
|
||||
|
||||
/// Raw text.
|
||||
/// Raw block.
|
||||
Raw {
|
||||
/// The raw text (not yet unescaped as for strings).
|
||||
/// The raw text between the backticks.
|
||||
raw: &'s str,
|
||||
/// Whether the closing backtick was present.
|
||||
terminated: bool,
|
||||
},
|
||||
|
||||
/// Multi-line code block.
|
||||
Code {
|
||||
/// The language of the code block, if specified.
|
||||
lang: Option<Spanned<&'s str>>,
|
||||
/// The raw text (not yet unescaped as for strings).
|
||||
raw: &'s str,
|
||||
/// Whether the closing backticks were present.
|
||||
/// The number of opening backticks.
|
||||
backticks: usize,
|
||||
/// Whether all closing backticks were present.
|
||||
terminated: bool,
|
||||
},
|
||||
|
||||
@ -142,8 +133,7 @@ impl<'s> Token<'s> {
|
||||
Self::Backslash => "backslash",
|
||||
Self::Hashtag => "hashtag",
|
||||
Self::UnicodeEscape { .. } => "unicode escape sequence",
|
||||
Self::Raw { .. } => "raw text",
|
||||
Self::Code { .. } => "code block",
|
||||
Self::Raw { .. } => "raw block",
|
||||
Self::Text(_) => "text",
|
||||
Self::Invalid("*/") => "end of block comment",
|
||||
Self::Invalid(_) => "invalid token",
|
||||
|
@ -31,16 +31,93 @@ pub enum SyntaxNode {
|
||||
ToggleBolder,
|
||||
/// Plain text.
|
||||
Text(String),
|
||||
/// An optionally syntax-highlighted raw block.
|
||||
Raw(Raw),
|
||||
/// Section headings.
|
||||
Heading(Heading),
|
||||
/// Lines of raw text.
|
||||
Raw(Vec<String>),
|
||||
/// An optionally highlighted (multi-line) code block.
|
||||
Code(Code),
|
||||
/// A function call.
|
||||
Call(CallExpr),
|
||||
}
|
||||
|
||||
/// A raw block, rendered in monospace with optional syntax highlighting.
|
||||
///
|
||||
/// Raw blocks start with an arbitrary number of backticks and end with the same
|
||||
/// number of backticks. If you want to include a sequence of backticks in a raw
|
||||
/// block, simply surround the block with more backticks.
|
||||
///
|
||||
/// When using at least two backticks, an optional language tag may follow
|
||||
/// directly after the backticks. This tag defines which language to
|
||||
/// syntax-highlight the text in. Apart from the language tag and some
|
||||
/// whitespace trimming discussed below, everything inside a raw block is
|
||||
/// rendered verbatim, in particular, there are no escape sequences.
|
||||
///
|
||||
/// # Examples
|
||||
/// - Raw text is surrounded by backticks.
|
||||
/// ```typst
|
||||
/// `raw`
|
||||
/// ```
|
||||
/// - An optional language tag may follow directly at the start when the block
|
||||
/// is surrounded by at least two backticks.
|
||||
/// ```typst
|
||||
/// ``rust println!("hello!")``;
|
||||
/// ```
|
||||
/// - Blocks can span multiple lines. Two backticks suffice to be able to
|
||||
/// specify the language tag, but three are fine, too.
|
||||
/// ```typst
|
||||
/// ``rust
|
||||
/// loop {
|
||||
/// find_yak().shave();
|
||||
/// }
|
||||
/// ``
|
||||
/// ```
|
||||
/// - Start with a space to omit the language tag (the space will be trimmed
|
||||
/// from the output) and use more backticks to allow backticks in the raw
|
||||
/// text.
|
||||
/// `````typst
|
||||
/// ```` This contains ```backticks``` and has no leading & trailing spaces. ````
|
||||
/// `````
|
||||
///
|
||||
/// # Trimming
|
||||
/// If we would always render the raw text between the backticks exactly as
|
||||
/// given, a few things would become problematic or even impossible:
|
||||
/// - Typical multiline code blocks (like in the example above) would have an
|
||||
/// additional newline before and after the code.
|
||||
/// - Raw text wrapped in more than one backtick could not exist without
|
||||
/// leading whitespace since the first word would be interpreted as a
|
||||
/// language tag.
|
||||
/// - A single backtick without surrounding spaces could not exist as raw text
|
||||
/// since it would be interpreted as belonging to the opening or closing
|
||||
/// backticks.
|
||||
///
|
||||
/// To fix these problems, we trim text in multi-backtick blocks as follows:
|
||||
/// - We trim a single space or a sequence of whitespace followed by a newline
|
||||
/// at the start.
|
||||
/// - We trim a single space or a newline followed by a sequence of whitespace
|
||||
/// at the end.
|
||||
///
|
||||
/// With these rules, a single raw backtick can be produced by the sequence
|
||||
/// ``` `` ` `` ```, ``` `` unhighlighted text `` ``` has no surrounding
|
||||
/// spaces and multiline code blocks don't have extra empty lines. Note that
|
||||
/// you can always force leading or trailing whitespace simply by adding more
|
||||
/// spaces.
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub struct Raw {
|
||||
/// An optional identifier specifying the language to syntax-highlight in.
|
||||
pub lang: Option<Ident>,
|
||||
/// The lines of raw text, determined as the raw string between the
|
||||
/// backticks trimmed according to the above rules and split at newlines.
|
||||
pub lines: Vec<String>,
|
||||
/// Whether the element can be layouted inline.
|
||||
///
|
||||
/// - When true, it will be layouted integrated within the surrounding
|
||||
/// paragraph.
|
||||
/// - When false, it will be separated into its own paragraph.
|
||||
///
|
||||
/// Single-backtick blocks are always inline-level. Multi-backtick blocks
|
||||
/// are inline-level when they contain no newlines.
|
||||
pub inline: bool,
|
||||
}
|
||||
|
||||
/// A section heading.
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub struct Heading {
|
||||
@ -49,14 +126,6 @@ pub struct Heading {
|
||||
pub tree: SyntaxTree,
|
||||
}
|
||||
|
||||
/// A code block.
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub struct Code {
|
||||
pub lang: Option<Spanned<Ident>>,
|
||||
pub lines: Vec<String>,
|
||||
pub block: bool,
|
||||
}
|
||||
|
||||
/// An expression.
|
||||
#[derive(Clone, PartialEq)]
|
||||
pub enum Expr {
|
||||
|
Loading…
x
Reference in New Issue
Block a user