Added code blocks 🚟

This commit is contained in:
Martin Haug 2020-08-29 13:53:59 +02:00
parent 8a45ec2875
commit c2b6f2dc35
4 changed files with 261 additions and 7 deletions

View File

@ -3,7 +3,7 @@
use crate::style::LayoutStyle;
use crate::syntax::decoration::Decoration;
use crate::syntax::span::{Span, Spanned};
use crate::syntax::tree::{CallExpr, SyntaxNode, SyntaxTree};
use crate::syntax::tree::{CallExpr, SyntaxNode, SyntaxTree, CodeBlockExpr};
use crate::{DynFuture, Feedback, Pass};
use super::line::{LineContext, LineLayouter};
use super::text::{layout_text, TextContext};
@ -80,6 +80,7 @@ impl<'a> TreeLayouter<'a> {
}
SyntaxNode::Raw(lines) => self.layout_raw(lines).await,
SyntaxNode::CodeBlock(block) => self.layout_code(block).await,
SyntaxNode::Par(par) => self.layout_par(par).await,
SyntaxNode::Call(call) => {
self.layout_call(Spanned::new(call, node.span)).await;
@ -128,6 +129,21 @@ impl<'a> TreeLayouter<'a> {
self.style.text.fallback = fallback;
}
async fn layout_code(&mut self, block: &CodeBlockExpr) {
let fallback = self.style.text.fallback.clone();
self.style.text.fallback
.list_mut()
.insert(0, "monospace".to_string());
self.style.text.fallback.flatten();
for line in &block.raw {
self.layout_text(line).await;
self.layouter.finish_line();
}
self.style.text.fallback = fallback;
}
async fn layout_par(&mut self, par: &SyntaxTree) {
self.layout_tree(par).await;
self.layouter.add_secondary_spacing(

View File

@ -7,8 +7,15 @@ use crate::color::RgbaColor;
use crate::compute::table::SpannedEntry;
use super::decoration::Decoration;
use super::span::{Pos, Span, Spanned};
use super::tokens::{is_newline_char, Token, TokenMode, Tokens};
use super::tree::{CallExpr, Expr, SyntaxNode, SyntaxTree, TableExpr};
use super::tokens::{is_newline_char, Token, TokenMode, Tokens, is_identifier};
use super::tree::{
CallExpr,
Expr,
SyntaxNode,
SyntaxTree,
TableExpr,
CodeBlockExpr,
};
use super::Ident;
/// Parse a string of source code.
@ -84,6 +91,34 @@ impl Parser<'_> {
self.with_span(SyntaxNode::Raw(unescape_raw(raw)))
}
Token::Code { lang, raw, terminated } => {
if !terminated {
error!(
@self.feedback, Span::at(token.span.end),
"expected code block to close",
);
}
let mut valid_ident = false;
let mut lang = lang.map(|s| s.map(|v| {
if is_identifier(v) {
valid_ident = true;
}
Ident(v.to_string())
}));
if !valid_ident {
if let Some(l) = lang {
error!(
@self.feedback, l.span,
"expected language to be a valid identifier",
);
}
lang = None;
}
self.with_span(SyntaxNode::CodeBlock(CodeBlockExpr { raw: unescape_code(raw), lang }))
}
Token::Text(text) => {
self.with_span(SyntaxNode::Text(text.to_string()))
}
@ -627,6 +662,84 @@ fn unescape_raw(raw: &str) -> Vec<String> {
lines
}
/// Unescape raw markup and split it into into lines.
fn unescape_code(raw: &str) -> Vec<String> {
let mut iter = raw.chars().peekable();
let mut line = String::new();
let mut lines = Vec::new();
let mut backticks: usize = 0;
// This assignment is used in line 731, 733;
// the compiler does not want to acknowledge that, however.
#[allow(unused_assignments)]
let mut update_backtick_count = true;
while let Some(c) = iter.next() {
update_backtick_count = true;
if is_newline_char(c) {
if c == '\r' && iter.peek() == Some(&'\n') {
iter.next();
}
lines.push(std::mem::take(&mut line));
} else {
if c == '\\' && backticks > 0 {
let mut tail = String::new();
let mut escape_success = false;
let mut backticks_after_slash: u8 = 0;
while let Some(&s) = iter.peek() {
match s {
'\\' => {
if backticks_after_slash == 0 {
tail.push(s);
} else {
// Pattern like `\`\` should fail
// escape and just be printed verbantim.
break;
}
}
'`' => {
tail.push(s);
backticks_after_slash += 1;
if backticks_after_slash == 2 {
escape_success = true;
iter.next();
break;
}
}
_ => { break }
}
iter.next();
}
if !escape_success {
line.push(c);
backticks = backticks_after_slash as usize;
update_backtick_count = false;
} else {
backticks = 0;
}
line.push_str(&tail);
} else {
line.push(c);
}
}
if update_backtick_count && c == '`' {
backticks += 1;
} else if update_backtick_count {
backticks = 0;
}
}
lines.push(line);
lines
}
#[cfg(test)]
#[allow(non_snake_case)]
mod tests {
@ -652,6 +765,14 @@ mod tests {
};
}
fn Lang(text: &str) -> Option<Spanned<Ident>> { Some(Spanned::zero(Ident(text.to_string()))) }
macro_rules! C {
($lang:expr, $($line:expr),* $(,)?) => {
SyntaxNode::CodeBlock(CodeBlockExpr { raw: vec![$($line.to_string()) ,*], lang: $lang })
};
}
macro_rules! P {
($($tts:tt)*) => { SyntaxNode::Par(Tree![@$($tts)*]) };
}
@ -799,6 +920,28 @@ mod tests {
test("raw\\", vec!["raw\\"]);
}
#[test]
fn test_unescape_code() {
fn test(raw: &str, expected: Vec<&str>) {
assert_eq!(unescape_code(raw), expected);
}
test("code\\`", vec!["code\\`"]);
test("code`\\``", vec!["code```"]);
test("code`\\`a", vec!["code`\\`a"]);
test("code``hi`\\``", vec!["code``hi```"]);
test("code`\\\\``", vec!["code`\\``"]);
test("code`\\`\\`go", vec!["code`\\`\\`go"]);
test("code`\\`\\``", vec!["code`\\```"]);
test("code\ntext", vec!["code", "text"]);
test("a\r\nb", vec!["a", "b"]);
test("a\n\nb", vec!["a", "", "b"]);
test("a\r\x0Bb", vec!["a", "", "b"]);
test("a\r\n\r\nb", vec!["a", "", "b"]);
test("code\\a", vec!["code\\a"]);
test("code\\", vec!["code\\"]);
}
#[test]
fn test_parse_simple_nodes() {
t!("" => );
@ -811,8 +954,19 @@ mod tests {
t!("`py`" => P![R!["py"]]);
t!("`hi\nyou" => P![R!["hi", "you"]]);
e!("`hi\nyou" => s(1,3, 1,3, "expected backtick"));
t!("`hi\\`du`" => P![R!["hi`du"]]);
t!("💜\n\n 🌍" => P![T("💜")], P![T("🌍")]);
t!("`hi\\`du`" => P![R!["hi`du"]]);
t!("```java System.out.print```" => P![
C![Lang("java"), "System.out.print"]
]);
t!("``` console.log(\n\"alert\"\n)" => P![
C![None, "console.log(", "\"alert\"", ")"]
]);
t!("```typst \r\n Typst uses `\\`` to indicate code blocks" => P![
C![Lang("typst"), " Typst uses ``` to indicate code blocks"]
]);
e!("``` hi\nyou" => s(1,3, 1,3, "expected code block to close"));
e!("```🌍 hi\nyou```" => s(0,3, 0,4, "expected language to be a valid identifier"));
t!("💜\n\n 🌍" => P![T("💜")], P![T("🌍")]);
ts!("hi" => s(0,0, 0,2, P![s(0,0, 0,2, T("hi"))]));
ts!("*Hi*" => s(0,0, 0,4, P![

View File

@ -90,6 +90,16 @@ pub enum Token<'s> {
terminated: bool,
},
/// Multi-line code block.
Code {
/// The language of the code block, if specified.
lang: Option<Spanned<&'s str>>,
/// The raw text (not yet unescaped as for strings).
raw: &'s str,
/// Whether the closing backticks were present.
terminated: bool,
},
/// Any other consecutive string.
Text(&'s str),
@ -127,6 +137,7 @@ impl<'s> Token<'s> {
Underscore => "underscore",
Backslash => "backslash",
Raw { .. } => "raw text",
Code { .. } => "code block",
Text(_) => "text",
Invalid("*/") => "end of block comment",
Invalid(_) => "invalid token",
@ -241,7 +252,7 @@ impl<'s> Iterator for Tokens<'s> {
// Style toggles.
'_' if self.mode == Body => Underscore,
'`' if self.mode == Body => self.read_raw(),
'`' if self.mode == Body => self.read_raw_and_code(),
// An escaped thing.
'\\' if self.mode == Body => self.read_escaped(),
@ -330,8 +341,65 @@ impl<'s> Tokens<'s> {
Str { string, terminated }
}
fn read_raw(&mut self) -> Token<'s> {
fn read_raw_and_code(&mut self) -> Token<'s> {
let (raw, terminated) = self.read_until_unescaped('`');
if raw.len() == 0 && terminated && self.peek() == Some('`') {
// Third tick found; this is a code block
self.eat();
let mut backticks = 0;
let mut terminated = true;
// Reads the lang tag (until newline or whitespace)
let lang_start = self.pos();
let (lang_opt, _) = self.read_string_until(
|c| c == '`' || c.is_whitespace() || is_newline_char(c),
false, 0, 0);
let lang_end = self.pos();
#[derive(Debug, PartialEq)]
enum WhitespaceIngestion { All, ExceptNewline, Never }
let mut ingest_whitespace = WhitespaceIngestion::Never;
let mut start = self.index();
while backticks < 3 {
match self.eat() {
Some('`') => backticks += 1,
Some('\\') if backticks == 1 && self.peek() == Some('`') => {
backticks = 0;
}
Some(c) => {
// Remove whitespace between language and content or
// first line break, deal with CRLF and CR line endings.
if ingest_whitespace != WhitespaceIngestion::All
&& c == '\n' {
start += 1;
ingest_whitespace = WhitespaceIngestion::All;
} else if ingest_whitespace != WhitespaceIngestion::All
&& c == '\r' {
start += 1;
ingest_whitespace = WhitespaceIngestion::ExceptNewline;
} else if ingest_whitespace == WhitespaceIngestion::Never
&& c.is_whitespace() {
start += 1;
} else {
ingest_whitespace = WhitespaceIngestion::All;
}
}
None => {
terminated = false;
break;
}
}
}
let end = self.index() - (if terminated { 3 } else { 0 });
return Code {
lang: if lang_opt.len() == 0 { None } else {
Some(Spanned::new(lang_opt, Span::new(lang_start, lang_end)))
},
raw: &self.src[start..end],
terminated
}
}
Raw { raw, terminated }
}
@ -494,6 +562,7 @@ mod tests {
use crate::length::Length;
use crate::syntax::tests::*;
use super::*;
use super::super::span::Spanned;
use Token::{
Space as S,
LineComment as LC, BlockComment as BC,
@ -515,6 +584,9 @@ mod tests {
fn Str(string: &str, terminated: bool) -> Token { Token::Str { string, terminated } }
fn Raw(raw: &str, terminated: bool) -> Token { Token::Raw { raw, terminated } }
fn Code<'a>(lang: Option<&'a str>, raw: &'a str, terminated: bool) -> Token<'a> {
Token::Code { lang: lang.map(Spanned::zero), raw, terminated }
}
macro_rules! t { ($($tts:tt)*) => {test!(@spans=false, $($tts)*)} }
macro_rules! ts { ($($tts:tt)*) => {test!(@spans=true, $($tts)*)} }
@ -568,6 +640,10 @@ mod tests {
t!(Body, "`[func]`" => Raw("[func]", true));
t!(Body, "`]" => Raw("]", false));
t!(Body, "`\\``" => Raw("\\`", true));
t!(Body, "``not code`" => Raw("", true), T("not"), S(0), T("code"), Raw("", false));
t!(Body, "```rust hi```" => Code(Some("rust"), "hi", true));
t!(Body, "``` hi`\\``" => Code(None, "hi`\\``", false));
t!(Body, "```js \r\n document.write(\"go\")" => Code(Some("js"), " document.write(\"go\")", false));
t!(Body, "\\ " => Backslash, S(0));
t!(Header, "_`" => Invalid("_`"));
}

View File

@ -31,6 +31,8 @@ pub enum SyntaxNode {
Text(String),
/// Lines of raw text.
Raw(Vec<String>),
/// An optionally highlighted multi-line code block.
CodeBlock(CodeBlockExpr),
/// A paragraph of child nodes.
Par(SyntaxTree),
/// A function call.
@ -199,3 +201,9 @@ impl CallExpr {
}
}
}
/// An code block.
#[derive(Debug, Clone, PartialEq)]
pub struct CodeBlockExpr {
pub lang: Option<Spanned<Ident>>,
pub raw: Vec<String>,
}