diff --git a/src/layout/tree.rs b/src/layout/tree.rs index adc179bcb..16a2930ae 100644 --- a/src/layout/tree.rs +++ b/src/layout/tree.rs @@ -3,7 +3,7 @@ use crate::style::LayoutStyle; use crate::syntax::decoration::Decoration; use crate::syntax::span::{Span, Spanned}; -use crate::syntax::tree::{CallExpr, SyntaxNode, SyntaxTree}; +use crate::syntax::tree::{CallExpr, SyntaxNode, SyntaxTree, Code}; use crate::{DynFuture, Feedback, Pass}; use super::line::{LineContext, LineLayouter}; use super::text::{layout_text, TextContext}; @@ -63,10 +63,7 @@ impl<'a> TreeLayouter<'a> { match &node.v { SyntaxNode::Spacing => self.layout_space(), SyntaxNode::Linebreak => self.layouter.finish_line(), - SyntaxNode::Parbreak => self.layouter.add_secondary_spacing( - self.style.text.paragraph_spacing(), - SpacingKind::PARAGRAPH, - ), + SyntaxNode::Parbreak => self.layout_parbreak(), SyntaxNode::ToggleItalic => { self.style.text.italic = !self.style.text.italic; @@ -84,6 +81,8 @@ impl<'a> TreeLayouter<'a> { } SyntaxNode::Raw(lines) => self.layout_raw(lines).await, + SyntaxNode::Code(block) => self.layout_code(block).await, + SyntaxNode::Call(call) => { self.layout_call(Spanned::new(call, node.span)).await; } @@ -97,6 +96,13 @@ impl<'a> TreeLayouter<'a> { ); } + fn layout_parbreak(&mut self) { + self.layouter.add_secondary_spacing( + self.style.text.paragraph_spacing(), + SpacingKind::PARAGRAPH, + ); + } + async fn layout_text(&mut self, text: &str) { self.layouter.add( layout_text( @@ -131,6 +137,18 @@ impl<'a> TreeLayouter<'a> { self.style.text.fallback = fallback; } + async fn layout_code(&mut self, code: &Code) { + if code.block { + self.layout_parbreak(); + } + + self.layout_raw(&code.lines).await; + + if code.block { + self.layout_parbreak() + } + } + async fn layout_call(&mut self, call: Spanned<&CallExpr>) { let ctx = LayoutContext { style: &self.style, diff --git a/src/syntax/parsing.rs b/src/syntax/parsing.rs index ea72c838a..0d12f6e18 100644 --- a/src/syntax/parsing.rs +++ b/src/syntax/parsing.rs @@ -8,7 +8,9 @@ use crate::compute::table::SpannedEntry; use super::decoration::Decoration; use super::span::{Pos, Span, Spanned}; use super::tokens::{is_newline_char, Token, TokenMode, Tokens}; -use super::tree::{CallExpr, Expr, SyntaxNode, SyntaxTree, TableExpr}; +use super::tree::{ + CallExpr, Expr, SyntaxNode, SyntaxTree, TableExpr, Code, +}; use super::Ident; /// Parse a string of source code. @@ -77,6 +79,33 @@ impl Parser<'_> { self.with_span(SyntaxNode::Raw(unescape_raw(raw))) } + Token::Code { lang, raw, terminated } => { + if !terminated { + error!( + @self.feedback, Span::at(token.span.end), + "expected backticks", + ); + } + + let lang = lang.and_then(|lang| { + if let Some(ident) = Ident::new(lang.v) { + Some(Spanned::new(ident, lang.span)) + } else { + error!(@self.feedback, lang.span, "invalid identifier"); + None + } + }); + + let mut lines = unescape_code(raw); + let block = lines.len() > 1; + + if lines.last().map(|s| s.is_empty()).unwrap_or(false) { + lines.pop(); + } + + self.with_span(SyntaxNode::Code(Code { lang, lines, block })) + } + Token::Text(text) => { self.with_span(SyntaxNode::Text(text.to_string())) } @@ -589,17 +618,100 @@ fn unescape_string(string: &str) -> String { /// Unescape raw markup and split it into into lines. fn unescape_raw(raw: &str) -> Vec { let mut iter = raw.chars().peekable(); + let mut text = String::new(); + + while let Some(c) = iter.next() { + if c == '\\' { + if let Some(c) = iter.next() { + if c != '\\' && c != '`' { + text.push('\\'); + } + + text.push(c); + } else { + text.push('\\'); + } + } else { + text.push(c); + } + } + + split_lines(&text) +} + +/// Unescape raw markup and split it into into lines. +fn unescape_code(raw: &str) -> Vec { + let mut iter = raw.chars().peekable(); + let mut text = String::new(); + let mut backticks = 0u32; + let mut update_backtick_count; + + while let Some(c) = iter.next() { + update_backtick_count = true; + + if c == '\\' && backticks > 0 { + let mut tail = String::new(); + let mut escape_success = false; + let mut backticks_after_slash = 0u32; + + while let Some(&s) = iter.peek() { + match s { + '\\' => { + if backticks_after_slash == 0 { + tail.push('\\'); + } else { + // Pattern like `\`\` should fail + // escape and just be printed verbantim. + break; + } + } + '`' => { + tail.push(s); + backticks_after_slash += 1; + if backticks_after_slash == 2 { + escape_success = true; + iter.next(); + break; + } + } + _ => break, + } + + iter.next(); + } + + if !escape_success { + text.push(c); + backticks = backticks_after_slash; + update_backtick_count = false; + } else { + backticks = 0; + } + + text.push_str(&tail); + } else { + text.push(c); + } + + if update_backtick_count { + if c == '`' { + backticks += 1; + } else { + backticks = 0; + } + } + } + + split_lines(&text) +} + +fn split_lines(text: &str) -> Vec { + let mut iter = text.chars().peekable(); let mut line = String::new(); let mut lines = Vec::new(); while let Some(c) = iter.next() { - if c == '\\' { - match iter.next() { - Some('`') => line.push('`'), - Some(c) => { line.push('\\'); line.push(c); } - None => line.push('\\'), - } - } else if is_newline_char(c) { + if is_newline_char(c) { if c == '\r' && iter.peek() == Some(&'\n') { iter.next(); } @@ -640,6 +752,25 @@ mod tests { }; } + macro_rules! C { + (None, $($line:expr),* $(,)?) => {{ + let lines = vec![$($line.to_string()) ,*]; + SyntaxNode::Code(Code { + lang: None, + block: lines.len() > 1, + lines, + }) + }}; + (Some($lang:expr), $($line:expr),* $(,)?) => {{ + let lines = vec![$($line.to_string()) ,*]; + SyntaxNode::Code(Code { + lang: Some(Into::>::into($lang).map(|s| Ident(s.to_string()))), + block: lines.len() > 1, + lines, + }) + }}; + } + macro_rules! F { ($($tts:tt)*) => { SyntaxNode::Call(Call!(@$($tts)*)) } } @@ -774,6 +905,7 @@ mod tests { } test("raw\\`", vec!["raw`"]); + test("raw\\\\`", vec!["raw\\`"]); test("raw\ntext", vec!["raw", "text"]); test("a\r\nb", vec!["a", "b"]); test("a\n\nb", vec!["a", "", "b"]); @@ -783,6 +915,28 @@ mod tests { test("raw\\", vec!["raw\\"]); } + #[test] + fn test_unescape_code() { + fn test(raw: &str, expected: Vec<&str>) { + assert_eq!(unescape_code(raw), expected); + } + + test("code\\`", vec!["code\\`"]); + test("code`\\``", vec!["code```"]); + test("code`\\`a", vec!["code`\\`a"]); + test("code``hi`\\``", vec!["code``hi```"]); + test("code`\\\\``", vec!["code`\\``"]); + test("code`\\`\\`go", vec!["code`\\`\\`go"]); + test("code`\\`\\``", vec!["code`\\```"]); + test("code\ntext", vec!["code", "text"]); + test("a\r\nb", vec!["a", "b"]); + test("a\n\nb", vec!["a", "", "b"]); + test("a\r\x0Bb", vec!["a", "", "b"]); + test("a\r\n\r\nb", vec!["a", "", "b"]); + test("code\\a", vec!["code\\a"]); + test("code\\", vec!["code\\"]); + } + #[test] fn test_parse_simple_nodes() { t!("" => ); @@ -797,6 +951,19 @@ mod tests { e!("`hi\nyou" => s(1,3, 1,3, "expected backtick")); t!("`hi\\`du`" => R!["hi`du"]); + t!("```java System.out.print```" => C![ + Some("java"), "System.out.print" + ]); + t!("``` console.log(\n\"alert\"\n)" => C![ + None, "console.log(", "\"alert\"", ")" + ]); + t!("```typst \r\n Typst uses `\\`` to indicate code blocks" => C![ + Some("typst"), " Typst uses ``` to indicate code blocks" + ]); + e!("``` hi\nyou" => s(1,3, 1,3, "expected backticks")); + e!("```🌍 hi\nyou```" => s(0,3, 0,4, "invalid identifier")); + t!("💜\n\n 🌍" => T("💜"), P, T("🌍")); + ts!("hi" => s(0,0, 0,2, T("hi"))); ts!("*Hi*" => s(0,0, 0,1, B), s(0,1, 0,3, T("Hi")), s(0,3, 0,4, B)); ts!("💜\n\n 🌍" => s(0,0, 0,1, T("💜")), s(0,1, 2,1, P), s(2,1, 2,2, T("🌍"))); diff --git a/src/syntax/tokens.rs b/src/syntax/tokens.rs index 1dcf9022e..7ecb05fe4 100644 --- a/src/syntax/tokens.rs +++ b/src/syntax/tokens.rs @@ -90,6 +90,16 @@ pub enum Token<'s> { terminated: bool, }, + /// Multi-line code block. + Code { + /// The language of the code block, if specified. + lang: Option>, + /// The raw text (not yet unescaped as for strings). + raw: &'s str, + /// Whether the closing backticks were present. + terminated: bool, + }, + /// Any other consecutive string. Text(&'s str), @@ -127,6 +137,7 @@ impl<'s> Token<'s> { Underscore => "underscore", Backslash => "backslash", Raw { .. } => "raw text", + Code { .. } => "code block", Text(_) => "text", Invalid("*/") => "end of block comment", Invalid(_) => "invalid token", @@ -241,7 +252,7 @@ impl<'s> Iterator for Tokens<'s> { // Style toggles. '_' if self.mode == Body => Underscore, - '`' if self.mode == Body => self.read_raw(), + '`' if self.mode == Body => self.read_raw_or_code(), // An escaped thing. '\\' if self.mode == Body => self.read_escaped(), @@ -330,9 +341,67 @@ impl<'s> Tokens<'s> { Str { string, terminated } } - fn read_raw(&mut self) -> Token<'s> { + fn read_raw_or_code(&mut self) -> Token<'s> { let (raw, terminated) = self.read_until_unescaped('`'); - Raw { raw, terminated } + if raw.is_empty() && terminated && self.peek() == Some('`') { + // Third tick found; this is a code block. + self.eat(); + + // Reads the lang tag (until newline or whitespace). + let start = self.pos(); + let lang = self.read_string_until( + |c| c == '`' || c.is_whitespace() || is_newline_char(c), + false, 0, 0, + ).0; + let end = self.pos(); + let lang = if !lang.is_empty() { + Some(Spanned::new(lang, Span::new(start, end))) + } else { + None + }; + + // Skip to start of raw contents. + while let Some(c) = self.peek() { + if is_newline_char(c) { + self.eat(); + if c == '\r' && self.peek() == Some('\n') { + self.eat(); + } + + break; + } else if c.is_whitespace() { + self.eat(); + } else { + break; + } + } + + let start = self.index(); + let mut backticks = 0u32; + + while backticks < 3 { + match self.eat() { + Some('`') => backticks += 1, + // Escaping of triple backticks. + Some('\\') if backticks == 1 && self.peek() == Some('`') => { + backticks = 0; + } + Some(_) => {} + None => break, + } + } + + let terminated = backticks == 3; + let end = self.index() - if terminated { 3 } else { 0 }; + + Code { + lang, + raw: &self.src[start..end], + terminated + } + } else { + Raw { raw, terminated } + } } fn read_until_unescaped(&mut self, c: char) -> (&'s str, bool) { @@ -494,6 +563,7 @@ mod tests { use crate::length::Length; use crate::syntax::tests::*; use super::*; + use super::super::span::Spanned; use Token::{ Space as S, LineComment as LC, BlockComment as BC, @@ -515,6 +585,9 @@ mod tests { fn Str(string: &str, terminated: bool) -> Token { Token::Str { string, terminated } } fn Raw(raw: &str, terminated: bool) -> Token { Token::Raw { raw, terminated } } + fn Code<'a>(lang: Option<&'a str>, raw: &'a str, terminated: bool) -> Token<'a> { + Token::Code { lang: lang.map(Spanned::zero), raw, terminated } + } macro_rules! t { ($($tts:tt)*) => {test!(@spans=false, $($tts)*)} } macro_rules! ts { ($($tts:tt)*) => {test!(@spans=true, $($tts)*)} } @@ -568,6 +641,10 @@ mod tests { t!(Body, "`[func]`" => Raw("[func]", true)); t!(Body, "`]" => Raw("]", false)); t!(Body, "`\\``" => Raw("\\`", true)); + t!(Body, "``not code`" => Raw("", true), T("not"), S(0), T("code"), Raw("", false)); + t!(Body, "```rust hi```" => Code(Some("rust"), "hi", true)); + t!(Body, "``` hi`\\``" => Code(None, "hi`\\``", false)); + t!(Body, "```js \r\n document.write(\"go\")" => Code(Some("js"), " document.write(\"go\")", false)); t!(Body, "\\ " => Backslash, S(0)); t!(Header, "_`" => Invalid("_`")); } diff --git a/src/syntax/tree.rs b/src/syntax/tree.rs index 31f334d2b..44acd0234 100644 --- a/src/syntax/tree.rs +++ b/src/syntax/tree.rs @@ -33,6 +33,8 @@ pub enum SyntaxNode { Text(String), /// Lines of raw text. Raw(Vec), + /// An optionally highlighted (multi-line) code block. + Code(Code), /// A function call. Call(CallExpr), } @@ -199,3 +201,10 @@ impl CallExpr { } } } +/// A code block. +#[derive(Debug, Clone, PartialEq)] +pub struct Code { + pub lang: Option>, + pub lines: Vec, + pub block: bool, +}