diff --git a/src/layout/tree.rs b/src/layout/tree.rs index 82a91131a..24a003677 100644 --- a/src/layout/tree.rs +++ b/src/layout/tree.rs @@ -5,7 +5,7 @@ use super::text::{layout_text, TextContext}; use super::*; use crate::style::LayoutStyle; use crate::syntax::{ - CallExpr, Code, Decoration, Heading, Span, SpanWith, Spanned, SyntaxNode, SyntaxTree, + CallExpr, Decoration, Heading, Raw, Span, SpanWith, Spanned, SyntaxNode, SyntaxTree, }; use crate::{DynFuture, Feedback, Pass}; @@ -83,8 +83,7 @@ impl<'a> TreeLayouter<'a> { SyntaxNode::Heading(heading) => self.layout_heading(heading).await, - SyntaxNode::Raw(lines) => self.layout_raw(lines).await, - SyntaxNode::Code(block) => self.layout_code(block).await, + SyntaxNode::Raw(raw) => self.layout_raw(raw).await, SyntaxNode::Call(call) => { self.layout_call(call.span_with(node.span)).await; @@ -128,14 +127,18 @@ impl<'a> TreeLayouter<'a> { self.style.text = style; } - async fn layout_raw(&mut self, lines: &[String]) { + async fn layout_raw(&mut self, raw: &Raw) { + if !raw.inline { + self.layout_parbreak(); + } + // TODO: Make this more efficient. let fallback = self.style.text.fallback.clone(); self.style.text.fallback.list.insert(0, "monospace".to_string()); self.style.text.fallback.flatten(); let mut first = true; - for line in lines { + for line in &raw.lines { if !first { self.layouter.finish_line(); } @@ -144,18 +147,10 @@ impl<'a> TreeLayouter<'a> { } self.style.text.fallback = fallback; - } - async fn layout_code(&mut self, code: &Code) { - if code.block { + if !raw.inline { self.layout_parbreak(); } - - self.layout_raw(&code.lines).await; - - if code.block { - self.layout_parbreak() - } } async fn layout_call(&mut self, call: Spanned<&CallExpr>) { diff --git a/src/parse/escaping.rs b/src/parse/escaping.rs index 55b1fe67b..a2ff963b9 100644 --- a/src/parse/escaping.rs +++ b/src/parse/escaping.rs @@ -1,4 +1,5 @@ use super::is_newline_char; +use crate::syntax::{Ident, Raw}; /// Resolves all escape sequences in a string. pub fn unescape_string(string: &str) -> String { @@ -56,101 +57,60 @@ pub fn unescape_string(string: &str) -> String { out } -/// Resolves all escape sequences in raw markup (between backticks) and splits it into -/// into lines. -pub fn unescape_raw(raw: &str) -> Vec { +/// Resolves the language tag and trims the raw text. +/// +/// Returns: +/// - The language tag +/// - The raw lines +/// - Whether at least one newline was present in the untrimmed text. +pub fn process_raw(raw: &str) -> Raw { + let (lang, inner) = split_after_lang_tag(raw); + let (lines, had_newline) = trim_and_split_raw(inner); + Raw { lang, lines, inline: !had_newline } +} + +/// Parse the lang tag and return it alongside the remaining inner raw text. +fn split_after_lang_tag(raw: &str) -> (Option, &str) { + let mut lang = String::new(); + + let mut inner = raw; let mut iter = raw.chars(); - let mut text = String::new(); while let Some(c) = iter.next() { - if c == '\\' { - if let Some(c) = iter.next() { - if c != '\\' && c != '`' { - text.push('\\'); - } - - text.push(c); - } else { - text.push('\\'); - } - } else { - text.push(c); + if c == '`' || c.is_whitespace() || is_newline_char(c) { + break; } + + inner = iter.as_str(); + lang.push(c); } - split_lines(&text) + (Ident::new(lang), inner) } -/// Resolves all escape sequences in code markup (between triple backticks) and splits it -/// into into lines. -pub fn unescape_code(raw: &str) -> Vec { - let mut iter = raw.chars().peekable(); - let mut text = String::new(); - let mut backticks = 0u32; - let mut update_backtick_count; +/// Trims raw text and splits it into lines. +/// +/// Returns whether at least one newline was contained in `raw`. +fn trim_and_split_raw(raw: &str) -> (Vec, bool) { + // Trims one whitespace at end and start. + let raw = raw.strip_prefix(' ').unwrap_or(raw); + let raw = raw.strip_suffix(' ').unwrap_or(raw); - while let Some(c) = iter.next() { - update_backtick_count = true; + let mut lines = split_lines(raw); + let had_newline = lines.len() > 1; + let is_whitespace = |line: &String| line.chars().all(char::is_whitespace); - if c == '\\' && backticks > 0 { - let mut tail = String::new(); - let mut escape_success = false; - let mut backticks_after_slash = 0u32; - - while let Some(&s) = iter.peek() { - match s { - '\\' => { - if backticks_after_slash == 0 { - tail.push('\\'); - } else { - // Pattern like `\`\` should fail - // escape and just be printed verbantim. - break; - } - } - '`' => { - tail.push(s); - backticks_after_slash += 1; - if backticks_after_slash == 2 { - escape_success = true; - iter.next(); - break; - } - } - _ => break, - } - - iter.next(); - } - - if !escape_success { - text.push(c); - backticks = backticks_after_slash; - update_backtick_count = false; - } else { - backticks = 0; - } - - text.push_str(&tail); - } else { - text.push(c); - } - - if update_backtick_count { - if c == '`' { - backticks += 1; - } else { - backticks = 0; - } - } + // Trims a sequence of whitespace followed by a newline at the start. + if lines.first().map(is_whitespace).unwrap_or(false) { + lines.remove(0); } - split_lines(&text) -} + // Trims a newline followed by a sequence of whitespace at the end. + if lines.last().map(is_whitespace).unwrap_or(false) { + lines.pop(); + } -/// Converts a hexademical sequence (without braces or "\u") into a character. -pub fn hex_to_char(sequence: &str) -> Option { - u32::from_str_radix(sequence, 16).ok().and_then(std::char::from_u32) + (lines, had_newline) } /// Splits a string into a vector of lines (respecting Unicode & Windows line breaks). @@ -175,12 +135,17 @@ pub fn split_lines(text: &str) -> Vec { lines } +/// Converts a hexademical sequence (without braces or "\u") into a character. +pub fn hex_to_char(sequence: &str) -> Option { + u32::from_str_radix(sequence, 16).ok().and_then(std::char::from_u32) +} + #[cfg(test)] +#[rustfmt::skip] mod tests { use super::*; #[test] - #[rustfmt::skip] fn test_unescape_strings() { fn test(string: &str, expected: &str) { assert_eq!(unescape_string(string), expected.to_string()); @@ -201,43 +166,48 @@ mod tests { } #[test] - #[rustfmt::skip] - fn test_unescape_raws() { - fn test(raw: &str, expected: Vec<&str>) { - assert_eq!(unescape_raw(raw), expected); + fn test_split_after_lang_tag() { + fn test(raw: &str, lang: Option<&str>, inner: &str) { + let (found_lang, found_inner) = split_after_lang_tag(raw); + assert_eq!(found_lang.as_ref().map(|id| id.as_str()), lang); + assert_eq!(found_inner, inner); + } + + test("typst it!", Some("typst"), " it!"); + test("typst\n it!", Some("typst"), "\n it!"); + test("typst\n it!", Some("typst"), "\n it!"); + test("abc`", Some("abc"), "`"); + test(" hi", None, " hi"); + test("`", None, "`"); + } + + #[test] + fn test_trim_raw() { + fn test(raw: &str, expected: Vec<&str>) { + assert_eq!(trim_and_split_raw(raw).0, expected); + } + + test(" hi", vec!["hi"]); + test(" hi", vec![" hi"]); + test("\nhi", vec!["hi"]); + test(" \n hi", vec![" hi"]); + test("hi ", vec!["hi"]); + test("hi ", vec!["hi "]); + test("hi\n", vec!["hi"]); + test("hi \n ", vec!["hi "]); + test(" \n hi \n ", vec![" hi "]); + } + + #[test] + fn test_split_lines() { + fn test(raw: &str, expected: Vec<&str>) { + assert_eq!(split_lines(raw), expected); } - test("raw\\`", vec!["raw`"]); - test("raw\\\\`", vec!["raw\\`"]); test("raw\ntext", vec!["raw", "text"]); test("a\r\nb", vec!["a", "b"]); test("a\n\nb", vec!["a", "", "b"]); test("a\r\x0Bb", vec!["a", "", "b"]); test("a\r\n\r\nb", vec!["a", "", "b"]); - test("raw\\a", vec!["raw\\a"]); - test("raw\\", vec!["raw\\"]); - } - - #[test] - #[rustfmt::skip] - fn test_unescape_code() { - fn test(raw: &str, expected: Vec<&str>) { - assert_eq!(unescape_code(raw), expected); - } - - test("code\\`", vec!["code\\`"]); - test("code`\\``", vec!["code```"]); - test("code`\\`a", vec!["code`\\`a"]); - test("code``hi`\\``", vec!["code``hi```"]); - test("code`\\\\``", vec!["code`\\``"]); - test("code`\\`\\`go", vec!["code`\\`\\`go"]); - test("code`\\`\\``", vec!["code`\\```"]); - test("code\ntext", vec!["code", "text"]); - test("a\r\nb", vec!["a", "b"]); - test("a\n\nb", vec!["a", "", "b"]); - test("a\r\x0Bb", vec!["a", "", "b"]); - test("a\r\n\r\nb", vec!["a", "", "b"]); - test("code\\a", vec!["code\\a"]); - test("code\\", vec!["code\\"]); } } diff --git a/src/parse/parser.rs b/src/parse/parser.rs index bbd7ee1d2..3446af838 100644 --- a/src/parse/parser.rs +++ b/src/parse/parser.rs @@ -99,35 +99,22 @@ impl Parser<'_> { self.parse_heading().map(SyntaxNode::Heading) } - Token::Raw { raw, terminated } => { + Token::Raw { raw, backticks, terminated } => { if !terminated { - error!(@self.feedback, end, "expected backtick"); - } - self.with_span(SyntaxNode::Raw(unescape_raw(raw))) - } - - Token::Code { lang, raw, terminated } => { - if !terminated { - error!(@self.feedback, end, "expected backticks"); + error!(@self.feedback, end, "expected backtick(s)"); } - let lang = lang.and_then(|lang| { - if let Some(ident) = Ident::new(lang.v) { - Some(ident.span_with(lang.span)) - } else { - error!(@self.feedback, lang.span, "invalid identifier"); - None + let raw = if backticks > 1 { + process_raw(raw) + } else { + Raw { + lang: None, + lines: split_lines(raw), + inline: true, } - }); + }; - let mut lines = unescape_code(raw); - let block = lines.len() > 1; - - if lines.last().map(|s| s.is_empty()).unwrap_or(false) { - lines.pop(); - } - - self.with_span(SyntaxNode::Code(Code { lang, lines, block })) + self.with_span(SyntaxNode::Raw(raw)) } Token::Text(text) => self.with_span(SyntaxNode::Text(text.to_string())), diff --git a/src/parse/tests.rs b/src/parse/tests.rs index d663aa2a4..8ddf013d6 100644 --- a/src/parse/tests.rs +++ b/src/parse/tests.rs @@ -29,24 +29,17 @@ macro_rules! H { } macro_rules! R { - ($($line:expr),* $(,)?) => { - SyntaxNode::Raw(vec![$($line.to_string()),*]) - }; -} - -macro_rules! C { - ($lang:expr, $($line:expr),* $(,)?) => {{ - let lines = vec![$($line.to_string()) ,*]; - SyntaxNode::Code(Code { + ($lang:expr, $inline:expr, $($line:expr),* $(,)?) => {{ + SyntaxNode::Raw(Raw { lang: $lang, - block: lines.len() > 1, - lines, + lines: vec![$($line.to_string()) ,*], + inline: $inline, }) }}; } -fn Lang<'a, T: Into>>(lang: T) -> Option> { - Some(Into::>::into(lang).map(|s| Ident(s.to_string()))) +fn Lang(lang: &str) -> Option { + Some(Ident(lang.to_string())) } macro_rules! F { @@ -220,19 +213,7 @@ fn test_parse_simple_nodes() { t!("\\u{1f303}" => T("🌃")); t!("\n\n\nhello" => P, T("hello")); t!(r"a\ b" => T("a"), L, S, T("b")); - t!("`py`" => R!["py"]); - t!("`hi\nyou" => R!["hi", "you"]); - e!("`hi\nyou" => s(7, 7, "expected backtick")); - t!("`hi\\`du`" => R!["hi`du"]); - ts!("```java out```" => s(0, 14, C![Lang(s(3, 7, "java")), "out"])); - t!("``` console.log(\n\"alert\"\n)" => C![None, "console.log(", "\"alert\"", ")"]); - t!("```typst \r\n Typst uses `\\`` to indicate code blocks" => C![ - Lang("typst"), " Typst uses ``` to indicate code blocks" - ]); - - e!("``` hi\nyou" => s(10, 10, "expected backticks")); - e!("```🌍 hi\nyou```" => s(3, 7, "invalid identifier")); e!("\\u{d421c809}" => s(0, 12, "invalid unicode escape sequence")); e!("\\u{abc" => s(6, 6, "expected closing brace")); t!("💜\n\n 🌍" => T("💜"), P, T("🌍")); @@ -242,6 +223,33 @@ fn test_parse_simple_nodes() { ts!("💜\n\n 🌍" => s(0, 4, T("💜")), s(4, 7, P), s(7, 11, T("🌍"))); } +#[test] +fn test_parse_raw() { + t!("`py`" => R![None, true, "py"]); + t!("`hi\nyou" => R![None, true, "hi", "you"]); + t!(r"`` hi\`du``" => R![None, true, r"hi\`du"]); + + // More than one backtick with optional language tag. + t!("``` console.log(\n\"alert\"\n)" => R![None, false, "console.log(", "\"alert\"", ")"]); + t!("````typst \r\n Typst uses ``` to indicate code blocks````!" + => R![Lang("typst"), false, " Typst uses ``` to indicate code blocks"], T("!")); + + // Trimming of whitespace. + t!("`` a ``" => R![None, true, "a"]); + t!("`` a ``" => R![None, true, "a "]); + t!("`` ` ``" => R![None, true, "`"]); + t!("``` ` ```" => R![None, true, " ` "]); + t!("``` ` \n ```" => R![None, false, " ` "]); + + // Errors. + e!("`hi\nyou" => s(7, 7, "expected backtick(s)")); + e!("``` hi\nyou" => s(10, 10, "expected backtick(s)")); + + // TODO: Bring back when spans/errors are in place. + // ts!("``java out``" => s(0, 12, R![Lang(s(2, 6, "java")), true, "out"])); + // e!("```🌍 hi\nyou```" => s(3, 7, "invalid identifier")); +} + #[test] fn test_parse_comments() { // In body. @@ -348,7 +356,7 @@ fn test_parse_function_bodies() { e!(" [val][ */]" => s(8, 10, "unexpected end of block comment")); // Raw in body. - t!("[val][`Hi]`" => F!("val"; Tree![R!["Hi]"]])); + t!("[val][`Hi]`" => F!("val"; Tree![R![None, true, "Hi]"]])); e!("[val][`Hi]`" => s(11, 11, "expected closing bracket")); // Crazy. diff --git a/src/parse/tokenizer.rs b/src/parse/tokenizer.rs index 92d15edca..720bec438 100644 --- a/src/parse/tokenizer.rs +++ b/src/parse/tokenizer.rs @@ -56,7 +56,7 @@ impl<'s> Tokens<'s> { /// The position in the string at which the last token ends and next token /// will start. pub fn pos(&self) -> Pos { - Pos(self.index as u32) + self.index.into() } } @@ -111,7 +111,7 @@ impl<'s> Iterator for Tokens<'s> { // Style toggles. '_' if self.mode == Body => Underscore, - '`' if self.mode == Body => self.read_raw_or_code(), + '`' if self.mode == Body => self.read_raw(), // Sections. '#' if self.mode == Body => Hashtag, @@ -230,66 +230,31 @@ impl<'s> Tokens<'s> { Str { string, terminated } } - fn read_raw_or_code(&mut self) -> Token<'s> { - let (raw, terminated) = self.read_until_unescaped('`'); - if raw.is_empty() && terminated && self.peek() == Some('`') { - // Third tick found; this is a code block. + fn read_raw(&mut self) -> Token<'s> { + let mut backticks = 1; + while self.peek() == Some('`') { self.eat(); + backticks += 1; + } - // Reads the lang tag (until newline or whitespace). - let start = self.pos(); - let (lang, _) = self.read_string_until(false, 0, 0, |c| { - c == '`' || c.is_whitespace() || is_newline_char(c) - }); - let end = self.pos(); + let start = self.index; - let lang = if !lang.is_empty() { - Some(lang.span_with(Span::new(start, end))) - } else { - None - }; - - // Skip to start of raw contents. - while let Some(c) = self.peek() { - if is_newline_char(c) { - self.eat(); - if c == '\r' && self.peek() == Some('\n') { - self.eat(); - } - - break; - } else if c.is_whitespace() { - self.eat(); - } else { - break; - } + let mut found = 0; + while found < backticks { + match self.eat() { + Some('`') => found += 1, + Some(_) => found = 0, + None => break, } + } - let start = self.index; - let mut backticks = 0u32; + let terminated = found == backticks; + let end = self.index - if terminated { found } else { 0 }; - while backticks < 3 { - match self.eat() { - Some('`') => backticks += 1, - // Escaping of triple backticks. - Some('\\') if backticks == 1 && self.peek() == Some('`') => { - backticks = 0; - } - Some(_) => {} - None => break, - } - } - - let terminated = backticks == 3; - let end = self.index - if terminated { 3 } else { 0 }; - - Code { - lang, - raw: &self.src[start .. end], - terminated, - } - } else { - Raw { raw, terminated } + Raw { + raw: &self.src[start .. end], + backticks, + terminated, } } @@ -469,18 +434,8 @@ mod tests { fn Str(string: &str, terminated: bool) -> Token { Token::Str { string, terminated } } - fn Raw(raw: &str, terminated: bool) -> Token { - Token::Raw { raw, terminated } - } - fn Code<'a>( - lang: Option>, - raw: &'a str, - terminated: bool, - ) -> Token<'a> { - Token::Code { lang, raw, terminated } - } - fn Lang<'a, T: Into>>(lang: T) -> Option> { - Some(Into::>::into(lang)) + fn Raw(raw: &str, backticks: usize, terminated: bool) -> Token { + Token::Raw { raw, backticks, terminated } } fn UE(sequence: &str, terminated: bool) -> Token { Token::UnicodeEscape { sequence, terminated } @@ -535,20 +490,32 @@ mod tests { t!(Body, "***" => Star, Star, Star); t!(Body, "[func]*bold*" => L, T("func"), R, Star, T("bold"), Star); t!(Body, "hi_you_ there" => T("hi"), Underscore, T("you"), Underscore, S(0), T("there")); - t!(Body, "`raw`" => Raw("raw", true)); t!(Body, "# hi" => Hashtag, S(0), T("hi")); t!(Body, "#()" => Hashtag, T("()")); - t!(Body, "`[func]`" => Raw("[func]", true)); - t!(Body, "`]" => Raw("]", false)); - t!(Body, "\\ " => Backslash, S(0)); - t!(Body, "`\\``" => Raw("\\`", true)); - t!(Body, "``not code`" => Raw("", true), T("not"), S(0), T("code"), Raw("", false)); - t!(Body, "```rust hi```" => Code(Lang("rust"), "hi", true)); - t!(Body, "``` hi`\\``" => Code(None, "hi`\\``", false)); - t!(Body, "```js \r\n document.write(\"go\")" => Code(Lang("js"), " document.write(\"go\")", false)); t!(Header, "_`" => Invalid("_`")); } + #[test] + fn test_tokenize_raw() { + // Basics. + t!(Body, "`raw`" => Raw("raw", 1, true)); + t!(Body, "`[func]`" => Raw("[func]", 1, true)); + t!(Body, "`]" => Raw("]", 1, false)); + t!(Body, r"`\`` " => Raw(r"\", 1, true), Raw(" ", 1, false)); + + // Language tag. + t!(Body, "``` hi```" => Raw(" hi", 3, true)); + t!(Body, "```rust hi```" => Raw("rust hi", 3, true)); + t!(Body, r"``` hi\````" => Raw(r" hi\", 3, true), Raw("", 1, false)); + t!(Body, "``` not `y`e`t finished```" => Raw(" not `y`e`t finished", 3, true)); + t!(Body, "```js \r\n document.write(\"go\")`" + => Raw("js \r\n document.write(\"go\")`", 3, false)); + + // More backticks. + t!(Body, "`````` ``````hi" => Raw(" ", 6, true), T("hi")); + t!(Body, "````\n```js\nalert()\n```\n````" => Raw("\n```js\nalert()\n```\n", 4, true)); + } + #[test] fn tokenize_header_only_tokens() { t!(Body, "a: b" => T("a:"), S(0), T("b")); diff --git a/src/syntax/span.rs b/src/syntax/span.rs index 1bd14c654..d803eeebf 100644 --- a/src/syntax/span.rs +++ b/src/syntax/span.rs @@ -189,6 +189,12 @@ impl From for Pos { } } +impl From for Pos { + fn from(index: usize) -> Self { + Self(index as u32) + } +} + impl Offset for Pos { fn offset(self, by: Self) -> Self { Pos(self.0 + by.0) diff --git a/src/syntax/token.rs b/src/syntax/token.rs index e91a780ca..b7d4c4e20 100644 --- a/src/syntax/token.rs +++ b/src/syntax/token.rs @@ -1,6 +1,5 @@ //! Tokenization. -use super::span::Spanned; use crate::length::Length; /// A minimal semantic entity of source code. @@ -86,21 +85,13 @@ pub enum Token<'s> { terminated: bool, }, - /// Raw text. + /// Raw block. Raw { - /// The raw text (not yet unescaped as for strings). + /// The raw text between the backticks. raw: &'s str, - /// Whether the closing backtick was present. - terminated: bool, - }, - - /// Multi-line code block. - Code { - /// The language of the code block, if specified. - lang: Option>, - /// The raw text (not yet unescaped as for strings). - raw: &'s str, - /// Whether the closing backticks were present. + /// The number of opening backticks. + backticks: usize, + /// Whether all closing backticks were present. terminated: bool, }, @@ -142,8 +133,7 @@ impl<'s> Token<'s> { Self::Backslash => "backslash", Self::Hashtag => "hashtag", Self::UnicodeEscape { .. } => "unicode escape sequence", - Self::Raw { .. } => "raw text", - Self::Code { .. } => "code block", + Self::Raw { .. } => "raw block", Self::Text(_) => "text", Self::Invalid("*/") => "end of block comment", Self::Invalid(_) => "invalid token", diff --git a/src/syntax/tree.rs b/src/syntax/tree.rs index 5327bfa44..51a7937a0 100644 --- a/src/syntax/tree.rs +++ b/src/syntax/tree.rs @@ -31,16 +31,93 @@ pub enum SyntaxNode { ToggleBolder, /// Plain text. Text(String), + /// An optionally syntax-highlighted raw block. + Raw(Raw), /// Section headings. Heading(Heading), - /// Lines of raw text. - Raw(Vec), - /// An optionally highlighted (multi-line) code block. - Code(Code), /// A function call. Call(CallExpr), } +/// A raw block, rendered in monospace with optional syntax highlighting. +/// +/// Raw blocks start with an arbitrary number of backticks and end with the same +/// number of backticks. If you want to include a sequence of backticks in a raw +/// block, simply surround the block with more backticks. +/// +/// When using at least two backticks, an optional language tag may follow +/// directly after the backticks. This tag defines which language to +/// syntax-highlight the text in. Apart from the language tag and some +/// whitespace trimming discussed below, everything inside a raw block is +/// rendered verbatim, in particular, there are no escape sequences. +/// +/// # Examples +/// - Raw text is surrounded by backticks. +/// ```typst +/// `raw` +/// ``` +/// - An optional language tag may follow directly at the start when the block +/// is surrounded by at least two backticks. +/// ```typst +/// ``rust println!("hello!")``; +/// ``` +/// - Blocks can span multiple lines. Two backticks suffice to be able to +/// specify the language tag, but three are fine, too. +/// ```typst +/// ``rust +/// loop { +/// find_yak().shave(); +/// } +/// `` +/// ``` +/// - Start with a space to omit the language tag (the space will be trimmed +/// from the output) and use more backticks to allow backticks in the raw +/// text. +/// `````typst +/// ```` This contains ```backticks``` and has no leading & trailing spaces. ```` +/// ````` +/// +/// # Trimming +/// If we would always render the raw text between the backticks exactly as +/// given, a few things would become problematic or even impossible: +/// - Typical multiline code blocks (like in the example above) would have an +/// additional newline before and after the code. +/// - Raw text wrapped in more than one backtick could not exist without +/// leading whitespace since the first word would be interpreted as a +/// language tag. +/// - A single backtick without surrounding spaces could not exist as raw text +/// since it would be interpreted as belonging to the opening or closing +/// backticks. +/// +/// To fix these problems, we trim text in multi-backtick blocks as follows: +/// - We trim a single space or a sequence of whitespace followed by a newline +/// at the start. +/// - We trim a single space or a newline followed by a sequence of whitespace +/// at the end. +/// +/// With these rules, a single raw backtick can be produced by the sequence +/// ``` `` ` `` ```, ``` `` unhighlighted text `` ``` has no surrounding +/// spaces and multiline code blocks don't have extra empty lines. Note that +/// you can always force leading or trailing whitespace simply by adding more +/// spaces. +#[derive(Debug, Clone, PartialEq)] +pub struct Raw { + /// An optional identifier specifying the language to syntax-highlight in. + pub lang: Option, + /// The lines of raw text, determined as the raw string between the + /// backticks trimmed according to the above rules and split at newlines. + pub lines: Vec, + /// Whether the element can be layouted inline. + /// + /// - When true, it will be layouted integrated within the surrounding + /// paragraph. + /// - When false, it will be separated into its own paragraph. + /// + /// Single-backtick blocks are always inline-level. Multi-backtick blocks + /// are inline-level when they contain no newlines. + pub inline: bool, +} + /// A section heading. #[derive(Debug, Clone, PartialEq)] pub struct Heading { @@ -49,14 +126,6 @@ pub struct Heading { pub tree: SyntaxTree, } -/// A code block. -#[derive(Debug, Clone, PartialEq)] -pub struct Code { - pub lang: Option>, - pub lines: Vec, - pub block: bool, -} - /// An expression. #[derive(Clone, PartialEq)] pub enum Expr {