mirror of
https://github.com/typst/typst
synced 2025-05-14 04:56:26 +08:00
Refactor raw blocks 💱
This commit is contained in:
parent
7cc279f7ae
commit
4077a7c11e
@ -5,7 +5,7 @@ use super::text::{layout_text, TextContext};
|
|||||||
use super::*;
|
use super::*;
|
||||||
use crate::style::LayoutStyle;
|
use crate::style::LayoutStyle;
|
||||||
use crate::syntax::{
|
use crate::syntax::{
|
||||||
CallExpr, Code, Decoration, Heading, Span, SpanWith, Spanned, SyntaxNode, SyntaxTree,
|
CallExpr, Decoration, Heading, Raw, Span, SpanWith, Spanned, SyntaxNode, SyntaxTree,
|
||||||
};
|
};
|
||||||
use crate::{DynFuture, Feedback, Pass};
|
use crate::{DynFuture, Feedback, Pass};
|
||||||
|
|
||||||
@ -83,8 +83,7 @@ impl<'a> TreeLayouter<'a> {
|
|||||||
|
|
||||||
SyntaxNode::Heading(heading) => self.layout_heading(heading).await,
|
SyntaxNode::Heading(heading) => self.layout_heading(heading).await,
|
||||||
|
|
||||||
SyntaxNode::Raw(lines) => self.layout_raw(lines).await,
|
SyntaxNode::Raw(raw) => self.layout_raw(raw).await,
|
||||||
SyntaxNode::Code(block) => self.layout_code(block).await,
|
|
||||||
|
|
||||||
SyntaxNode::Call(call) => {
|
SyntaxNode::Call(call) => {
|
||||||
self.layout_call(call.span_with(node.span)).await;
|
self.layout_call(call.span_with(node.span)).await;
|
||||||
@ -128,14 +127,18 @@ impl<'a> TreeLayouter<'a> {
|
|||||||
self.style.text = style;
|
self.style.text = style;
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn layout_raw(&mut self, lines: &[String]) {
|
async fn layout_raw(&mut self, raw: &Raw) {
|
||||||
|
if !raw.inline {
|
||||||
|
self.layout_parbreak();
|
||||||
|
}
|
||||||
|
|
||||||
// TODO: Make this more efficient.
|
// TODO: Make this more efficient.
|
||||||
let fallback = self.style.text.fallback.clone();
|
let fallback = self.style.text.fallback.clone();
|
||||||
self.style.text.fallback.list.insert(0, "monospace".to_string());
|
self.style.text.fallback.list.insert(0, "monospace".to_string());
|
||||||
self.style.text.fallback.flatten();
|
self.style.text.fallback.flatten();
|
||||||
|
|
||||||
let mut first = true;
|
let mut first = true;
|
||||||
for line in lines {
|
for line in &raw.lines {
|
||||||
if !first {
|
if !first {
|
||||||
self.layouter.finish_line();
|
self.layouter.finish_line();
|
||||||
}
|
}
|
||||||
@ -144,18 +147,10 @@ impl<'a> TreeLayouter<'a> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
self.style.text.fallback = fallback;
|
self.style.text.fallback = fallback;
|
||||||
}
|
|
||||||
|
|
||||||
async fn layout_code(&mut self, code: &Code) {
|
if !raw.inline {
|
||||||
if code.block {
|
|
||||||
self.layout_parbreak();
|
self.layout_parbreak();
|
||||||
}
|
}
|
||||||
|
|
||||||
self.layout_raw(&code.lines).await;
|
|
||||||
|
|
||||||
if code.block {
|
|
||||||
self.layout_parbreak()
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn layout_call(&mut self, call: Spanned<&CallExpr>) {
|
async fn layout_call(&mut self, call: Spanned<&CallExpr>) {
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
use super::is_newline_char;
|
use super::is_newline_char;
|
||||||
|
use crate::syntax::{Ident, Raw};
|
||||||
|
|
||||||
/// Resolves all escape sequences in a string.
|
/// Resolves all escape sequences in a string.
|
||||||
pub fn unescape_string(string: &str) -> String {
|
pub fn unescape_string(string: &str) -> String {
|
||||||
@ -56,101 +57,60 @@ pub fn unescape_string(string: &str) -> String {
|
|||||||
out
|
out
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Resolves all escape sequences in raw markup (between backticks) and splits it into
|
/// Resolves the language tag and trims the raw text.
|
||||||
/// into lines.
|
///
|
||||||
pub fn unescape_raw(raw: &str) -> Vec<String> {
|
/// Returns:
|
||||||
|
/// - The language tag
|
||||||
|
/// - The raw lines
|
||||||
|
/// - Whether at least one newline was present in the untrimmed text.
|
||||||
|
pub fn process_raw(raw: &str) -> Raw {
|
||||||
|
let (lang, inner) = split_after_lang_tag(raw);
|
||||||
|
let (lines, had_newline) = trim_and_split_raw(inner);
|
||||||
|
Raw { lang, lines, inline: !had_newline }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parse the lang tag and return it alongside the remaining inner raw text.
|
||||||
|
fn split_after_lang_tag(raw: &str) -> (Option<Ident>, &str) {
|
||||||
|
let mut lang = String::new();
|
||||||
|
|
||||||
|
let mut inner = raw;
|
||||||
let mut iter = raw.chars();
|
let mut iter = raw.chars();
|
||||||
let mut text = String::new();
|
|
||||||
|
|
||||||
while let Some(c) = iter.next() {
|
while let Some(c) = iter.next() {
|
||||||
if c == '\\' {
|
if c == '`' || c.is_whitespace() || is_newline_char(c) {
|
||||||
if let Some(c) = iter.next() {
|
break;
|
||||||
if c != '\\' && c != '`' {
|
|
||||||
text.push('\\');
|
|
||||||
}
|
|
||||||
|
|
||||||
text.push(c);
|
|
||||||
} else {
|
|
||||||
text.push('\\');
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
text.push(c);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inner = iter.as_str();
|
||||||
|
lang.push(c);
|
||||||
}
|
}
|
||||||
|
|
||||||
split_lines(&text)
|
(Ident::new(lang), inner)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Resolves all escape sequences in code markup (between triple backticks) and splits it
|
/// Trims raw text and splits it into lines.
|
||||||
/// into into lines.
|
///
|
||||||
pub fn unescape_code(raw: &str) -> Vec<String> {
|
/// Returns whether at least one newline was contained in `raw`.
|
||||||
let mut iter = raw.chars().peekable();
|
fn trim_and_split_raw(raw: &str) -> (Vec<String>, bool) {
|
||||||
let mut text = String::new();
|
// Trims one whitespace at end and start.
|
||||||
let mut backticks = 0u32;
|
let raw = raw.strip_prefix(' ').unwrap_or(raw);
|
||||||
let mut update_backtick_count;
|
let raw = raw.strip_suffix(' ').unwrap_or(raw);
|
||||||
|
|
||||||
while let Some(c) = iter.next() {
|
let mut lines = split_lines(raw);
|
||||||
update_backtick_count = true;
|
let had_newline = lines.len() > 1;
|
||||||
|
let is_whitespace = |line: &String| line.chars().all(char::is_whitespace);
|
||||||
|
|
||||||
if c == '\\' && backticks > 0 {
|
// Trims a sequence of whitespace followed by a newline at the start.
|
||||||
let mut tail = String::new();
|
if lines.first().map(is_whitespace).unwrap_or(false) {
|
||||||
let mut escape_success = false;
|
lines.remove(0);
|
||||||
let mut backticks_after_slash = 0u32;
|
|
||||||
|
|
||||||
while let Some(&s) = iter.peek() {
|
|
||||||
match s {
|
|
||||||
'\\' => {
|
|
||||||
if backticks_after_slash == 0 {
|
|
||||||
tail.push('\\');
|
|
||||||
} else {
|
|
||||||
// Pattern like `\`\` should fail
|
|
||||||
// escape and just be printed verbantim.
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
'`' => {
|
|
||||||
tail.push(s);
|
|
||||||
backticks_after_slash += 1;
|
|
||||||
if backticks_after_slash == 2 {
|
|
||||||
escape_success = true;
|
|
||||||
iter.next();
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
_ => break,
|
|
||||||
}
|
|
||||||
|
|
||||||
iter.next();
|
|
||||||
}
|
|
||||||
|
|
||||||
if !escape_success {
|
|
||||||
text.push(c);
|
|
||||||
backticks = backticks_after_slash;
|
|
||||||
update_backtick_count = false;
|
|
||||||
} else {
|
|
||||||
backticks = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
text.push_str(&tail);
|
|
||||||
} else {
|
|
||||||
text.push(c);
|
|
||||||
}
|
|
||||||
|
|
||||||
if update_backtick_count {
|
|
||||||
if c == '`' {
|
|
||||||
backticks += 1;
|
|
||||||
} else {
|
|
||||||
backticks = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
split_lines(&text)
|
// Trims a newline followed by a sequence of whitespace at the end.
|
||||||
}
|
if lines.last().map(is_whitespace).unwrap_or(false) {
|
||||||
|
lines.pop();
|
||||||
|
}
|
||||||
|
|
||||||
/// Converts a hexademical sequence (without braces or "\u") into a character.
|
(lines, had_newline)
|
||||||
pub fn hex_to_char(sequence: &str) -> Option<char> {
|
|
||||||
u32::from_str_radix(sequence, 16).ok().and_then(std::char::from_u32)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Splits a string into a vector of lines (respecting Unicode & Windows line breaks).
|
/// Splits a string into a vector of lines (respecting Unicode & Windows line breaks).
|
||||||
@ -175,12 +135,17 @@ pub fn split_lines(text: &str) -> Vec<String> {
|
|||||||
lines
|
lines
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Converts a hexademical sequence (without braces or "\u") into a character.
|
||||||
|
pub fn hex_to_char(sequence: &str) -> Option<char> {
|
||||||
|
u32::from_str_radix(sequence, 16).ok().and_then(std::char::from_u32)
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
|
#[rustfmt::skip]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
#[rustfmt::skip]
|
|
||||||
fn test_unescape_strings() {
|
fn test_unescape_strings() {
|
||||||
fn test(string: &str, expected: &str) {
|
fn test(string: &str, expected: &str) {
|
||||||
assert_eq!(unescape_string(string), expected.to_string());
|
assert_eq!(unescape_string(string), expected.to_string());
|
||||||
@ -201,43 +166,48 @@ mod tests {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
#[rustfmt::skip]
|
fn test_split_after_lang_tag() {
|
||||||
fn test_unescape_raws() {
|
fn test(raw: &str, lang: Option<&str>, inner: &str) {
|
||||||
fn test(raw: &str, expected: Vec<&str>) {
|
let (found_lang, found_inner) = split_after_lang_tag(raw);
|
||||||
assert_eq!(unescape_raw(raw), expected);
|
assert_eq!(found_lang.as_ref().map(|id| id.as_str()), lang);
|
||||||
|
assert_eq!(found_inner, inner);
|
||||||
|
}
|
||||||
|
|
||||||
|
test("typst it!", Some("typst"), " it!");
|
||||||
|
test("typst\n it!", Some("typst"), "\n it!");
|
||||||
|
test("typst\n it!", Some("typst"), "\n it!");
|
||||||
|
test("abc`", Some("abc"), "`");
|
||||||
|
test(" hi", None, " hi");
|
||||||
|
test("`", None, "`");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_trim_raw() {
|
||||||
|
fn test(raw: &str, expected: Vec<&str>) {
|
||||||
|
assert_eq!(trim_and_split_raw(raw).0, expected);
|
||||||
|
}
|
||||||
|
|
||||||
|
test(" hi", vec!["hi"]);
|
||||||
|
test(" hi", vec![" hi"]);
|
||||||
|
test("\nhi", vec!["hi"]);
|
||||||
|
test(" \n hi", vec![" hi"]);
|
||||||
|
test("hi ", vec!["hi"]);
|
||||||
|
test("hi ", vec!["hi "]);
|
||||||
|
test("hi\n", vec!["hi"]);
|
||||||
|
test("hi \n ", vec!["hi "]);
|
||||||
|
test(" \n hi \n ", vec![" hi "]);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_split_lines() {
|
||||||
|
fn test(raw: &str, expected: Vec<&str>) {
|
||||||
|
assert_eq!(split_lines(raw), expected);
|
||||||
}
|
}
|
||||||
|
|
||||||
test("raw\\`", vec!["raw`"]);
|
|
||||||
test("raw\\\\`", vec!["raw\\`"]);
|
|
||||||
test("raw\ntext", vec!["raw", "text"]);
|
test("raw\ntext", vec!["raw", "text"]);
|
||||||
test("a\r\nb", vec!["a", "b"]);
|
test("a\r\nb", vec!["a", "b"]);
|
||||||
test("a\n\nb", vec!["a", "", "b"]);
|
test("a\n\nb", vec!["a", "", "b"]);
|
||||||
test("a\r\x0Bb", vec!["a", "", "b"]);
|
test("a\r\x0Bb", vec!["a", "", "b"]);
|
||||||
test("a\r\n\r\nb", vec!["a", "", "b"]);
|
test("a\r\n\r\nb", vec!["a", "", "b"]);
|
||||||
test("raw\\a", vec!["raw\\a"]);
|
|
||||||
test("raw\\", vec!["raw\\"]);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
#[rustfmt::skip]
|
|
||||||
fn test_unescape_code() {
|
|
||||||
fn test(raw: &str, expected: Vec<&str>) {
|
|
||||||
assert_eq!(unescape_code(raw), expected);
|
|
||||||
}
|
|
||||||
|
|
||||||
test("code\\`", vec!["code\\`"]);
|
|
||||||
test("code`\\``", vec!["code```"]);
|
|
||||||
test("code`\\`a", vec!["code`\\`a"]);
|
|
||||||
test("code``hi`\\``", vec!["code``hi```"]);
|
|
||||||
test("code`\\\\``", vec!["code`\\``"]);
|
|
||||||
test("code`\\`\\`go", vec!["code`\\`\\`go"]);
|
|
||||||
test("code`\\`\\``", vec!["code`\\```"]);
|
|
||||||
test("code\ntext", vec!["code", "text"]);
|
|
||||||
test("a\r\nb", vec!["a", "b"]);
|
|
||||||
test("a\n\nb", vec!["a", "", "b"]);
|
|
||||||
test("a\r\x0Bb", vec!["a", "", "b"]);
|
|
||||||
test("a\r\n\r\nb", vec!["a", "", "b"]);
|
|
||||||
test("code\\a", vec!["code\\a"]);
|
|
||||||
test("code\\", vec!["code\\"]);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -99,35 +99,22 @@ impl Parser<'_> {
|
|||||||
self.parse_heading().map(SyntaxNode::Heading)
|
self.parse_heading().map(SyntaxNode::Heading)
|
||||||
}
|
}
|
||||||
|
|
||||||
Token::Raw { raw, terminated } => {
|
Token::Raw { raw, backticks, terminated } => {
|
||||||
if !terminated {
|
if !terminated {
|
||||||
error!(@self.feedback, end, "expected backtick");
|
error!(@self.feedback, end, "expected backtick(s)");
|
||||||
}
|
|
||||||
self.with_span(SyntaxNode::Raw(unescape_raw(raw)))
|
|
||||||
}
|
|
||||||
|
|
||||||
Token::Code { lang, raw, terminated } => {
|
|
||||||
if !terminated {
|
|
||||||
error!(@self.feedback, end, "expected backticks");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
let lang = lang.and_then(|lang| {
|
let raw = if backticks > 1 {
|
||||||
if let Some(ident) = Ident::new(lang.v) {
|
process_raw(raw)
|
||||||
Some(ident.span_with(lang.span))
|
} else {
|
||||||
} else {
|
Raw {
|
||||||
error!(@self.feedback, lang.span, "invalid identifier");
|
lang: None,
|
||||||
None
|
lines: split_lines(raw),
|
||||||
|
inline: true,
|
||||||
}
|
}
|
||||||
});
|
};
|
||||||
|
|
||||||
let mut lines = unescape_code(raw);
|
self.with_span(SyntaxNode::Raw(raw))
|
||||||
let block = lines.len() > 1;
|
|
||||||
|
|
||||||
if lines.last().map(|s| s.is_empty()).unwrap_or(false) {
|
|
||||||
lines.pop();
|
|
||||||
}
|
|
||||||
|
|
||||||
self.with_span(SyntaxNode::Code(Code { lang, lines, block }))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Token::Text(text) => self.with_span(SyntaxNode::Text(text.to_string())),
|
Token::Text(text) => self.with_span(SyntaxNode::Text(text.to_string())),
|
||||||
|
@ -29,24 +29,17 @@ macro_rules! H {
|
|||||||
}
|
}
|
||||||
|
|
||||||
macro_rules! R {
|
macro_rules! R {
|
||||||
($($line:expr),* $(,)?) => {
|
($lang:expr, $inline:expr, $($line:expr),* $(,)?) => {{
|
||||||
SyntaxNode::Raw(vec![$($line.to_string()),*])
|
SyntaxNode::Raw(Raw {
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
macro_rules! C {
|
|
||||||
($lang:expr, $($line:expr),* $(,)?) => {{
|
|
||||||
let lines = vec![$($line.to_string()) ,*];
|
|
||||||
SyntaxNode::Code(Code {
|
|
||||||
lang: $lang,
|
lang: $lang,
|
||||||
block: lines.len() > 1,
|
lines: vec![$($line.to_string()) ,*],
|
||||||
lines,
|
inline: $inline,
|
||||||
})
|
})
|
||||||
}};
|
}};
|
||||||
}
|
}
|
||||||
|
|
||||||
fn Lang<'a, T: Into<Spanned<&'a str>>>(lang: T) -> Option<Spanned<Ident>> {
|
fn Lang(lang: &str) -> Option<Ident> {
|
||||||
Some(Into::<Spanned<&str>>::into(lang).map(|s| Ident(s.to_string())))
|
Some(Ident(lang.to_string()))
|
||||||
}
|
}
|
||||||
|
|
||||||
macro_rules! F {
|
macro_rules! F {
|
||||||
@ -220,19 +213,7 @@ fn test_parse_simple_nodes() {
|
|||||||
t!("\\u{1f303}" => T("🌃"));
|
t!("\\u{1f303}" => T("🌃"));
|
||||||
t!("\n\n\nhello" => P, T("hello"));
|
t!("\n\n\nhello" => P, T("hello"));
|
||||||
t!(r"a\ b" => T("a"), L, S, T("b"));
|
t!(r"a\ b" => T("a"), L, S, T("b"));
|
||||||
t!("`py`" => R!["py"]);
|
|
||||||
t!("`hi\nyou" => R!["hi", "you"]);
|
|
||||||
e!("`hi\nyou" => s(7, 7, "expected backtick"));
|
|
||||||
t!("`hi\\`du`" => R!["hi`du"]);
|
|
||||||
|
|
||||||
ts!("```java out```" => s(0, 14, C![Lang(s(3, 7, "java")), "out"]));
|
|
||||||
t!("``` console.log(\n\"alert\"\n)" => C![None, "console.log(", "\"alert\"", ")"]);
|
|
||||||
t!("```typst \r\n Typst uses `\\`` to indicate code blocks" => C![
|
|
||||||
Lang("typst"), " Typst uses ``` to indicate code blocks"
|
|
||||||
]);
|
|
||||||
|
|
||||||
e!("``` hi\nyou" => s(10, 10, "expected backticks"));
|
|
||||||
e!("```🌍 hi\nyou```" => s(3, 7, "invalid identifier"));
|
|
||||||
e!("\\u{d421c809}" => s(0, 12, "invalid unicode escape sequence"));
|
e!("\\u{d421c809}" => s(0, 12, "invalid unicode escape sequence"));
|
||||||
e!("\\u{abc" => s(6, 6, "expected closing brace"));
|
e!("\\u{abc" => s(6, 6, "expected closing brace"));
|
||||||
t!("💜\n\n 🌍" => T("💜"), P, T("🌍"));
|
t!("💜\n\n 🌍" => T("💜"), P, T("🌍"));
|
||||||
@ -242,6 +223,33 @@ fn test_parse_simple_nodes() {
|
|||||||
ts!("💜\n\n 🌍" => s(0, 4, T("💜")), s(4, 7, P), s(7, 11, T("🌍")));
|
ts!("💜\n\n 🌍" => s(0, 4, T("💜")), s(4, 7, P), s(7, 11, T("🌍")));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_parse_raw() {
|
||||||
|
t!("`py`" => R![None, true, "py"]);
|
||||||
|
t!("`hi\nyou" => R![None, true, "hi", "you"]);
|
||||||
|
t!(r"`` hi\`du``" => R![None, true, r"hi\`du"]);
|
||||||
|
|
||||||
|
// More than one backtick with optional language tag.
|
||||||
|
t!("``` console.log(\n\"alert\"\n)" => R![None, false, "console.log(", "\"alert\"", ")"]);
|
||||||
|
t!("````typst \r\n Typst uses ``` to indicate code blocks````!"
|
||||||
|
=> R![Lang("typst"), false, " Typst uses ``` to indicate code blocks"], T("!"));
|
||||||
|
|
||||||
|
// Trimming of whitespace.
|
||||||
|
t!("`` a ``" => R![None, true, "a"]);
|
||||||
|
t!("`` a ``" => R![None, true, "a "]);
|
||||||
|
t!("`` ` ``" => R![None, true, "`"]);
|
||||||
|
t!("``` ` ```" => R![None, true, " ` "]);
|
||||||
|
t!("``` ` \n ```" => R![None, false, " ` "]);
|
||||||
|
|
||||||
|
// Errors.
|
||||||
|
e!("`hi\nyou" => s(7, 7, "expected backtick(s)"));
|
||||||
|
e!("``` hi\nyou" => s(10, 10, "expected backtick(s)"));
|
||||||
|
|
||||||
|
// TODO: Bring back when spans/errors are in place.
|
||||||
|
// ts!("``java out``" => s(0, 12, R![Lang(s(2, 6, "java")), true, "out"]));
|
||||||
|
// e!("```🌍 hi\nyou```" => s(3, 7, "invalid identifier"));
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_parse_comments() {
|
fn test_parse_comments() {
|
||||||
// In body.
|
// In body.
|
||||||
@ -348,7 +356,7 @@ fn test_parse_function_bodies() {
|
|||||||
e!(" [val][ */]" => s(8, 10, "unexpected end of block comment"));
|
e!(" [val][ */]" => s(8, 10, "unexpected end of block comment"));
|
||||||
|
|
||||||
// Raw in body.
|
// Raw in body.
|
||||||
t!("[val][`Hi]`" => F!("val"; Tree![R!["Hi]"]]));
|
t!("[val][`Hi]`" => F!("val"; Tree![R![None, true, "Hi]"]]));
|
||||||
e!("[val][`Hi]`" => s(11, 11, "expected closing bracket"));
|
e!("[val][`Hi]`" => s(11, 11, "expected closing bracket"));
|
||||||
|
|
||||||
// Crazy.
|
// Crazy.
|
||||||
|
@ -56,7 +56,7 @@ impl<'s> Tokens<'s> {
|
|||||||
/// The position in the string at which the last token ends and next token
|
/// The position in the string at which the last token ends and next token
|
||||||
/// will start.
|
/// will start.
|
||||||
pub fn pos(&self) -> Pos {
|
pub fn pos(&self) -> Pos {
|
||||||
Pos(self.index as u32)
|
self.index.into()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -111,7 +111,7 @@ impl<'s> Iterator for Tokens<'s> {
|
|||||||
|
|
||||||
// Style toggles.
|
// Style toggles.
|
||||||
'_' if self.mode == Body => Underscore,
|
'_' if self.mode == Body => Underscore,
|
||||||
'`' if self.mode == Body => self.read_raw_or_code(),
|
'`' if self.mode == Body => self.read_raw(),
|
||||||
|
|
||||||
// Sections.
|
// Sections.
|
||||||
'#' if self.mode == Body => Hashtag,
|
'#' if self.mode == Body => Hashtag,
|
||||||
@ -230,66 +230,31 @@ impl<'s> Tokens<'s> {
|
|||||||
Str { string, terminated }
|
Str { string, terminated }
|
||||||
}
|
}
|
||||||
|
|
||||||
fn read_raw_or_code(&mut self) -> Token<'s> {
|
fn read_raw(&mut self) -> Token<'s> {
|
||||||
let (raw, terminated) = self.read_until_unescaped('`');
|
let mut backticks = 1;
|
||||||
if raw.is_empty() && terminated && self.peek() == Some('`') {
|
while self.peek() == Some('`') {
|
||||||
// Third tick found; this is a code block.
|
|
||||||
self.eat();
|
self.eat();
|
||||||
|
backticks += 1;
|
||||||
|
}
|
||||||
|
|
||||||
// Reads the lang tag (until newline or whitespace).
|
let start = self.index;
|
||||||
let start = self.pos();
|
|
||||||
let (lang, _) = self.read_string_until(false, 0, 0, |c| {
|
|
||||||
c == '`' || c.is_whitespace() || is_newline_char(c)
|
|
||||||
});
|
|
||||||
let end = self.pos();
|
|
||||||
|
|
||||||
let lang = if !lang.is_empty() {
|
let mut found = 0;
|
||||||
Some(lang.span_with(Span::new(start, end)))
|
while found < backticks {
|
||||||
} else {
|
match self.eat() {
|
||||||
None
|
Some('`') => found += 1,
|
||||||
};
|
Some(_) => found = 0,
|
||||||
|
None => break,
|
||||||
// Skip to start of raw contents.
|
|
||||||
while let Some(c) = self.peek() {
|
|
||||||
if is_newline_char(c) {
|
|
||||||
self.eat();
|
|
||||||
if c == '\r' && self.peek() == Some('\n') {
|
|
||||||
self.eat();
|
|
||||||
}
|
|
||||||
|
|
||||||
break;
|
|
||||||
} else if c.is_whitespace() {
|
|
||||||
self.eat();
|
|
||||||
} else {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
let start = self.index;
|
let terminated = found == backticks;
|
||||||
let mut backticks = 0u32;
|
let end = self.index - if terminated { found } else { 0 };
|
||||||
|
|
||||||
while backticks < 3 {
|
Raw {
|
||||||
match self.eat() {
|
raw: &self.src[start .. end],
|
||||||
Some('`') => backticks += 1,
|
backticks,
|
||||||
// Escaping of triple backticks.
|
terminated,
|
||||||
Some('\\') if backticks == 1 && self.peek() == Some('`') => {
|
|
||||||
backticks = 0;
|
|
||||||
}
|
|
||||||
Some(_) => {}
|
|
||||||
None => break,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
let terminated = backticks == 3;
|
|
||||||
let end = self.index - if terminated { 3 } else { 0 };
|
|
||||||
|
|
||||||
Code {
|
|
||||||
lang,
|
|
||||||
raw: &self.src[start .. end],
|
|
||||||
terminated,
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
Raw { raw, terminated }
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -469,18 +434,8 @@ mod tests {
|
|||||||
fn Str(string: &str, terminated: bool) -> Token {
|
fn Str(string: &str, terminated: bool) -> Token {
|
||||||
Token::Str { string, terminated }
|
Token::Str { string, terminated }
|
||||||
}
|
}
|
||||||
fn Raw(raw: &str, terminated: bool) -> Token {
|
fn Raw(raw: &str, backticks: usize, terminated: bool) -> Token {
|
||||||
Token::Raw { raw, terminated }
|
Token::Raw { raw, backticks, terminated }
|
||||||
}
|
|
||||||
fn Code<'a>(
|
|
||||||
lang: Option<Spanned<&'a str>>,
|
|
||||||
raw: &'a str,
|
|
||||||
terminated: bool,
|
|
||||||
) -> Token<'a> {
|
|
||||||
Token::Code { lang, raw, terminated }
|
|
||||||
}
|
|
||||||
fn Lang<'a, T: Into<Spanned<&'a str>>>(lang: T) -> Option<Spanned<&'a str>> {
|
|
||||||
Some(Into::<Spanned<&str>>::into(lang))
|
|
||||||
}
|
}
|
||||||
fn UE(sequence: &str, terminated: bool) -> Token {
|
fn UE(sequence: &str, terminated: bool) -> Token {
|
||||||
Token::UnicodeEscape { sequence, terminated }
|
Token::UnicodeEscape { sequence, terminated }
|
||||||
@ -535,20 +490,32 @@ mod tests {
|
|||||||
t!(Body, "***" => Star, Star, Star);
|
t!(Body, "***" => Star, Star, Star);
|
||||||
t!(Body, "[func]*bold*" => L, T("func"), R, Star, T("bold"), Star);
|
t!(Body, "[func]*bold*" => L, T("func"), R, Star, T("bold"), Star);
|
||||||
t!(Body, "hi_you_ there" => T("hi"), Underscore, T("you"), Underscore, S(0), T("there"));
|
t!(Body, "hi_you_ there" => T("hi"), Underscore, T("you"), Underscore, S(0), T("there"));
|
||||||
t!(Body, "`raw`" => Raw("raw", true));
|
|
||||||
t!(Body, "# hi" => Hashtag, S(0), T("hi"));
|
t!(Body, "# hi" => Hashtag, S(0), T("hi"));
|
||||||
t!(Body, "#()" => Hashtag, T("()"));
|
t!(Body, "#()" => Hashtag, T("()"));
|
||||||
t!(Body, "`[func]`" => Raw("[func]", true));
|
|
||||||
t!(Body, "`]" => Raw("]", false));
|
|
||||||
t!(Body, "\\ " => Backslash, S(0));
|
|
||||||
t!(Body, "`\\``" => Raw("\\`", true));
|
|
||||||
t!(Body, "``not code`" => Raw("", true), T("not"), S(0), T("code"), Raw("", false));
|
|
||||||
t!(Body, "```rust hi```" => Code(Lang("rust"), "hi", true));
|
|
||||||
t!(Body, "``` hi`\\``" => Code(None, "hi`\\``", false));
|
|
||||||
t!(Body, "```js \r\n document.write(\"go\")" => Code(Lang("js"), " document.write(\"go\")", false));
|
|
||||||
t!(Header, "_`" => Invalid("_`"));
|
t!(Header, "_`" => Invalid("_`"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_tokenize_raw() {
|
||||||
|
// Basics.
|
||||||
|
t!(Body, "`raw`" => Raw("raw", 1, true));
|
||||||
|
t!(Body, "`[func]`" => Raw("[func]", 1, true));
|
||||||
|
t!(Body, "`]" => Raw("]", 1, false));
|
||||||
|
t!(Body, r"`\`` " => Raw(r"\", 1, true), Raw(" ", 1, false));
|
||||||
|
|
||||||
|
// Language tag.
|
||||||
|
t!(Body, "``` hi```" => Raw(" hi", 3, true));
|
||||||
|
t!(Body, "```rust hi```" => Raw("rust hi", 3, true));
|
||||||
|
t!(Body, r"``` hi\````" => Raw(r" hi\", 3, true), Raw("", 1, false));
|
||||||
|
t!(Body, "``` not `y`e`t finished```" => Raw(" not `y`e`t finished", 3, true));
|
||||||
|
t!(Body, "```js \r\n document.write(\"go\")`"
|
||||||
|
=> Raw("js \r\n document.write(\"go\")`", 3, false));
|
||||||
|
|
||||||
|
// More backticks.
|
||||||
|
t!(Body, "`````` ``````hi" => Raw(" ", 6, true), T("hi"));
|
||||||
|
t!(Body, "````\n```js\nalert()\n```\n````" => Raw("\n```js\nalert()\n```\n", 4, true));
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn tokenize_header_only_tokens() {
|
fn tokenize_header_only_tokens() {
|
||||||
t!(Body, "a: b" => T("a:"), S(0), T("b"));
|
t!(Body, "a: b" => T("a:"), S(0), T("b"));
|
||||||
|
@ -189,6 +189,12 @@ impl From<u32> for Pos {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl From<usize> for Pos {
|
||||||
|
fn from(index: usize) -> Self {
|
||||||
|
Self(index as u32)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl Offset for Pos {
|
impl Offset for Pos {
|
||||||
fn offset(self, by: Self) -> Self {
|
fn offset(self, by: Self) -> Self {
|
||||||
Pos(self.0 + by.0)
|
Pos(self.0 + by.0)
|
||||||
|
@ -1,6 +1,5 @@
|
|||||||
//! Tokenization.
|
//! Tokenization.
|
||||||
|
|
||||||
use super::span::Spanned;
|
|
||||||
use crate::length::Length;
|
use crate::length::Length;
|
||||||
|
|
||||||
/// A minimal semantic entity of source code.
|
/// A minimal semantic entity of source code.
|
||||||
@ -86,21 +85,13 @@ pub enum Token<'s> {
|
|||||||
terminated: bool,
|
terminated: bool,
|
||||||
},
|
},
|
||||||
|
|
||||||
/// Raw text.
|
/// Raw block.
|
||||||
Raw {
|
Raw {
|
||||||
/// The raw text (not yet unescaped as for strings).
|
/// The raw text between the backticks.
|
||||||
raw: &'s str,
|
raw: &'s str,
|
||||||
/// Whether the closing backtick was present.
|
/// The number of opening backticks.
|
||||||
terminated: bool,
|
backticks: usize,
|
||||||
},
|
/// Whether all closing backticks were present.
|
||||||
|
|
||||||
/// Multi-line code block.
|
|
||||||
Code {
|
|
||||||
/// The language of the code block, if specified.
|
|
||||||
lang: Option<Spanned<&'s str>>,
|
|
||||||
/// The raw text (not yet unescaped as for strings).
|
|
||||||
raw: &'s str,
|
|
||||||
/// Whether the closing backticks were present.
|
|
||||||
terminated: bool,
|
terminated: bool,
|
||||||
},
|
},
|
||||||
|
|
||||||
@ -142,8 +133,7 @@ impl<'s> Token<'s> {
|
|||||||
Self::Backslash => "backslash",
|
Self::Backslash => "backslash",
|
||||||
Self::Hashtag => "hashtag",
|
Self::Hashtag => "hashtag",
|
||||||
Self::UnicodeEscape { .. } => "unicode escape sequence",
|
Self::UnicodeEscape { .. } => "unicode escape sequence",
|
||||||
Self::Raw { .. } => "raw text",
|
Self::Raw { .. } => "raw block",
|
||||||
Self::Code { .. } => "code block",
|
|
||||||
Self::Text(_) => "text",
|
Self::Text(_) => "text",
|
||||||
Self::Invalid("*/") => "end of block comment",
|
Self::Invalid("*/") => "end of block comment",
|
||||||
Self::Invalid(_) => "invalid token",
|
Self::Invalid(_) => "invalid token",
|
||||||
|
@ -31,16 +31,93 @@ pub enum SyntaxNode {
|
|||||||
ToggleBolder,
|
ToggleBolder,
|
||||||
/// Plain text.
|
/// Plain text.
|
||||||
Text(String),
|
Text(String),
|
||||||
|
/// An optionally syntax-highlighted raw block.
|
||||||
|
Raw(Raw),
|
||||||
/// Section headings.
|
/// Section headings.
|
||||||
Heading(Heading),
|
Heading(Heading),
|
||||||
/// Lines of raw text.
|
|
||||||
Raw(Vec<String>),
|
|
||||||
/// An optionally highlighted (multi-line) code block.
|
|
||||||
Code(Code),
|
|
||||||
/// A function call.
|
/// A function call.
|
||||||
Call(CallExpr),
|
Call(CallExpr),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// A raw block, rendered in monospace with optional syntax highlighting.
|
||||||
|
///
|
||||||
|
/// Raw blocks start with an arbitrary number of backticks and end with the same
|
||||||
|
/// number of backticks. If you want to include a sequence of backticks in a raw
|
||||||
|
/// block, simply surround the block with more backticks.
|
||||||
|
///
|
||||||
|
/// When using at least two backticks, an optional language tag may follow
|
||||||
|
/// directly after the backticks. This tag defines which language to
|
||||||
|
/// syntax-highlight the text in. Apart from the language tag and some
|
||||||
|
/// whitespace trimming discussed below, everything inside a raw block is
|
||||||
|
/// rendered verbatim, in particular, there are no escape sequences.
|
||||||
|
///
|
||||||
|
/// # Examples
|
||||||
|
/// - Raw text is surrounded by backticks.
|
||||||
|
/// ```typst
|
||||||
|
/// `raw`
|
||||||
|
/// ```
|
||||||
|
/// - An optional language tag may follow directly at the start when the block
|
||||||
|
/// is surrounded by at least two backticks.
|
||||||
|
/// ```typst
|
||||||
|
/// ``rust println!("hello!")``;
|
||||||
|
/// ```
|
||||||
|
/// - Blocks can span multiple lines. Two backticks suffice to be able to
|
||||||
|
/// specify the language tag, but three are fine, too.
|
||||||
|
/// ```typst
|
||||||
|
/// ``rust
|
||||||
|
/// loop {
|
||||||
|
/// find_yak().shave();
|
||||||
|
/// }
|
||||||
|
/// ``
|
||||||
|
/// ```
|
||||||
|
/// - Start with a space to omit the language tag (the space will be trimmed
|
||||||
|
/// from the output) and use more backticks to allow backticks in the raw
|
||||||
|
/// text.
|
||||||
|
/// `````typst
|
||||||
|
/// ```` This contains ```backticks``` and has no leading & trailing spaces. ````
|
||||||
|
/// `````
|
||||||
|
///
|
||||||
|
/// # Trimming
|
||||||
|
/// If we would always render the raw text between the backticks exactly as
|
||||||
|
/// given, a few things would become problematic or even impossible:
|
||||||
|
/// - Typical multiline code blocks (like in the example above) would have an
|
||||||
|
/// additional newline before and after the code.
|
||||||
|
/// - Raw text wrapped in more than one backtick could not exist without
|
||||||
|
/// leading whitespace since the first word would be interpreted as a
|
||||||
|
/// language tag.
|
||||||
|
/// - A single backtick without surrounding spaces could not exist as raw text
|
||||||
|
/// since it would be interpreted as belonging to the opening or closing
|
||||||
|
/// backticks.
|
||||||
|
///
|
||||||
|
/// To fix these problems, we trim text in multi-backtick blocks as follows:
|
||||||
|
/// - We trim a single space or a sequence of whitespace followed by a newline
|
||||||
|
/// at the start.
|
||||||
|
/// - We trim a single space or a newline followed by a sequence of whitespace
|
||||||
|
/// at the end.
|
||||||
|
///
|
||||||
|
/// With these rules, a single raw backtick can be produced by the sequence
|
||||||
|
/// ``` `` ` `` ```, ``` `` unhighlighted text `` ``` has no surrounding
|
||||||
|
/// spaces and multiline code blocks don't have extra empty lines. Note that
|
||||||
|
/// you can always force leading or trailing whitespace simply by adding more
|
||||||
|
/// spaces.
|
||||||
|
#[derive(Debug, Clone, PartialEq)]
|
||||||
|
pub struct Raw {
|
||||||
|
/// An optional identifier specifying the language to syntax-highlight in.
|
||||||
|
pub lang: Option<Ident>,
|
||||||
|
/// The lines of raw text, determined as the raw string between the
|
||||||
|
/// backticks trimmed according to the above rules and split at newlines.
|
||||||
|
pub lines: Vec<String>,
|
||||||
|
/// Whether the element can be layouted inline.
|
||||||
|
///
|
||||||
|
/// - When true, it will be layouted integrated within the surrounding
|
||||||
|
/// paragraph.
|
||||||
|
/// - When false, it will be separated into its own paragraph.
|
||||||
|
///
|
||||||
|
/// Single-backtick blocks are always inline-level. Multi-backtick blocks
|
||||||
|
/// are inline-level when they contain no newlines.
|
||||||
|
pub inline: bool,
|
||||||
|
}
|
||||||
|
|
||||||
/// A section heading.
|
/// A section heading.
|
||||||
#[derive(Debug, Clone, PartialEq)]
|
#[derive(Debug, Clone, PartialEq)]
|
||||||
pub struct Heading {
|
pub struct Heading {
|
||||||
@ -49,14 +126,6 @@ pub struct Heading {
|
|||||||
pub tree: SyntaxTree,
|
pub tree: SyntaxTree,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// A code block.
|
|
||||||
#[derive(Debug, Clone, PartialEq)]
|
|
||||||
pub struct Code {
|
|
||||||
pub lang: Option<Spanned<Ident>>,
|
|
||||||
pub lines: Vec<String>,
|
|
||||||
pub block: bool,
|
|
||||||
}
|
|
||||||
|
|
||||||
/// An expression.
|
/// An expression.
|
||||||
#[derive(Clone, PartialEq)]
|
#[derive(Clone, PartialEq)]
|
||||||
pub enum Expr {
|
pub enum Expr {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user