diff --git a/src/layout/model.rs b/src/layout/model.rs index 7e899f2e3..2800774a4 100644 --- a/src/layout/model.rs +++ b/src/layout/model.rs @@ -164,7 +164,8 @@ impl<'a> ModelLayouter<'a> { match node { Space => self.layout_space(), - Newline => self.layout_paragraph(), + Parbreak => self.layout_paragraph(), + Linebreak => self.layouter.finish_line(), Text(text) => { if self.style.text.variant.style == FontStyle::Italic { @@ -175,10 +176,6 @@ impl<'a> ModelLayouter<'a> { decorate(self, Decoration::Bold); } - if self.style.text.monospace { - decorate(self, Decoration::Monospace); - } - self.layout_text(text).await; } @@ -192,12 +189,28 @@ impl<'a> ModelLayouter<'a> { decorate(self, Decoration::Bold); } - ToggleMonospace => { - self.style.text.monospace = !self.style.text.monospace; - decorate(self, Decoration::Monospace); + Raw(lines) => { + // TODO: Make this more efficient. + let fallback = self.style.text.fallback.clone(); + self.style.text.fallback.list.insert(0, "monospace".to_string()); + self.style.text.fallback.flatten(); + + // Layout the first line. + let mut iter = lines.iter(); + if let Some(line) = iter.next() { + self.layout_text(line).await; + } + + // Put a newline before each following line. + for line in iter { + self.layouter.finish_line(); + self.layout_text(line).await; + } + + self.style.text.fallback = fallback; } - Node::Model(model) => { + Model(model) => { self.layout(Spanned::new(model.as_ref(), *span)).await; } } diff --git a/src/layout/text.rs b/src/layout/text.rs index 614d59fd0..286ccc68c 100644 --- a/src/layout/text.rs +++ b/src/layout/text.rs @@ -118,23 +118,13 @@ impl<'a> TextLayouter<'a> { variant.weight.0 += 300; } - let queried = if self.ctx.style.monospace { - loader.get(FontQuery { - // FIXME: This is a hack. - fallback: std::iter::once("source code pro") - .chain(self.ctx.style.fallback.iter()), - variant, - c, - }).await - } else { - loader.get(FontQuery { - fallback: self.ctx.style.fallback.iter(), - variant, - c, - }).await + let query = FontQuery { + fallback: self.ctx.style.fallback.iter(), + variant, + c, }; - if let Some((font, index)) = queried { + if let Some((font, index)) = loader.get(query).await { // Determine the width of the char. let header = font.read_table::
().ok()?; let font_unit_ratio = 1.0 / (header.units_per_em as f32); diff --git a/src/library/font.rs b/src/library/font.rs index 9c69e1ddc..ba778693f 100644 --- a/src/library/font.rs +++ b/src/library/font.rs @@ -9,21 +9,45 @@ function! { pub struct FontFamilyFunc { body: Option, list: Vec, + classes: Vec<(String, Vec)>, } parse(header, body, ctx, f) { + let list = header.args.pos.get_all::(&mut f.errors) + .map(|s| s.0.to_lowercase()) + .collect(); + + let tuples: Vec<_> = header.args.key + .get_all::(&mut f.errors) + .collect(); + + let classes = tuples.into_iter() + .map(|(class, mut tuple)| { + let fallback = tuple.get_all::(&mut f.errors) + .map(|s| s.0.to_lowercase()) + .collect(); + (class.to_lowercase(), fallback) + }) + .collect(); + FontFamilyFunc { body: body!(opt: body, ctx, f), - list: header.args.pos.get_all::(&mut f.errors) - .map(|s| s.0.to_lowercase()) - .collect(), + list, + classes, } } layout(self, ctx, errors) { - styled(&self.body, ctx, Some(&self.list), - |s, list| { - s.fallback.list = list.clone(); + styled(&self.body, ctx, Some(()), + |s, _| { + if !self.list.is_empty() { + s.fallback.list = self.list.clone(); + } + + for (class, fallback) in &self.classes { + s.fallback.set_class_list(class.clone(), fallback.clone()); + } + s.fallback.flatten(); }) } diff --git a/src/style.rs b/src/style.rs index 57d6b6f3b..075baa5ac 100644 --- a/src/style.rs +++ b/src/style.rs @@ -24,8 +24,6 @@ pub struct TextStyle { /// Whether the bolder toggle is active or inactive. This determines /// whether the next `*` adds or removes font weight. pub bolder: bool, - /// Whether the monospace toggle is active or inactive. - pub monospace: bool, /// The base font size. pub base_font_size: Size, /// The font scale to apply on the base font size. @@ -79,7 +77,6 @@ impl Default for TextStyle { weight: FontWeight(400), }, bolder: false, - monospace: false, base_font_size: Size::pt(11.0), font_scale: 1.0, word_spacing_scale: 0.25, diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs index c3a57ee60..f7321c77e 100644 --- a/src/syntax/mod.rs +++ b/src/syntax/mod.rs @@ -62,15 +62,17 @@ pub enum Node { /// Whitespace containing less than two newlines. Space, /// Whitespace with more than two newlines. - Newline, + Parbreak, + /// A forced line break. + Linebreak, /// Plain text. Text(String), + /// Lines of raw text. + Raw(Vec), /// Italics were enabled / disabled. ToggleItalic, /// Bolder was enabled / disabled. ToggleBolder, - /// Monospace was enabled / disabled. - ToggleMonospace, /// A submodel, typically a function invocation. Model(Box), } @@ -80,11 +82,12 @@ impl PartialEq for Node { use Node::*; match (self, other) { (Space, Space) => true, - (Newline, Newline) => true, + (Parbreak, Parbreak) => true, + (Linebreak, Linebreak) => true, (Text(a), Text(b)) => a == b, + (Raw(a), Raw(b)) => a == b, (ToggleItalic, ToggleItalic) => true, (ToggleBolder, ToggleBolder) => true, - (ToggleMonospace, ToggleMonospace) => true, (Model(a), Model(b)) => a == b, _ => false, } @@ -107,6 +110,7 @@ pub enum Decoration { /// ^^^^^^ /// ``` InvalidFuncName, + /// A key of a keyword argument: /// ```typst /// [box: width=5cm] @@ -119,12 +123,11 @@ pub enum Decoration { /// ^^^^ ^^^^^ /// ``` ObjectKey, + /// An italic word. Italic, /// A bold word. Bold, - /// A monospace word. - Monospace, } impl dyn Model { diff --git a/src/syntax/parsing.rs b/src/syntax/parsing.rs index a991c8281..2fb8b58ac 100644 --- a/src/syntax/parsing.rs +++ b/src/syntax/parsing.rs @@ -33,10 +33,12 @@ pub fn parse(start: Position, src: &str, ctx: ParseContext) -> Pass let span = token.span; let node = match token.v { + Token::LineComment(_) | Token::BlockComment(_) => continue, + // Only at least two newlines mean a _real_ newline indicating a // paragraph break. Token::Space(newlines) => if newlines >= 2 { - Node::Newline + Node::Parbreak } else { Node::Space }, @@ -55,10 +57,18 @@ pub fn parse(start: Position, src: &str, ctx: ParseContext) -> Pass Token::Star => Node::ToggleBolder, Token::Underscore => Node::ToggleItalic, - Token::Backtick => Node::ToggleMonospace, - Token::Text(text) => Node::Text(text.to_string()), + Token::Backslash => Node::Linebreak, - Token::LineComment(_) | Token::BlockComment(_) => continue, + Token::Raw { raw, terminated } => { + if !terminated { + feedback.errors.push(err!(Span::at(span.end); + "expected backtick")); + } + + Node::Raw(unescape_raw(raw)) + } + + Token::Text(text) => Node::Text(text.to_string()), other => { feedback.errors.push(err!(span; "unexpected {}", other.name())); @@ -219,7 +229,7 @@ impl<'s> FuncParser<'s> { self.expected_at("quote", first.span.end); } - take!(Expr::Str(unescape(string))) + take!(Expr::Str(unescape_string(string))) } Token::ExprNumber(n) => take!(Expr::Number(n)), @@ -433,36 +443,57 @@ impl<'s> FuncParser<'s> { } } -/// Unescape a string. -fn unescape(string: &str) -> String { +/// Unescape a string: `the string is \"this\"` => `the string is "this"`. +fn unescape_string(string: &str) -> String { let mut s = String::with_capacity(string.len()); - let mut escaped = false; + let mut iter = string.chars(); - for c in string.chars() { + while let Some(c) = iter.next() { if c == '\\' { - if escaped { - s.push('\\'); + match iter.next() { + Some('\\') => s.push('\\'), + Some('"') => s.push('"'), + Some('n') => s.push('\n'), + Some('t') => s.push('\t'), + Some(c) => { s.push('\\'); s.push(c); } + None => s.push('\\'), } - escaped = !escaped; } else { - if escaped { - match c { - '"' => s.push('"'), - 'n' => s.push('\n'), - 't' => s.push('\t'), - c => { s.push('\\'); s.push(c); } - } - } else { - s.push(c); - } - - escaped = false; + s.push(c); } } s } +/// Unescape raw markup into lines. +fn unescape_raw(raw: &str) -> Vec { + let mut lines = Vec::new(); + let mut s = String::new(); + let mut iter = raw.chars().peekable(); + + while let Some(c) = iter.next() { + if c == '\\' { + match iter.next() { + Some('`') => s.push('`'), + Some(c) => { s.push('\\'); s.push(c); } + None => s.push('\\'), + } + } else if is_newline_char(c) { + if c == '\r' && iter.peek() == Some(&'\n') { + iter.next(); + } + + lines.push(std::mem::replace(&mut s, String::new())); + } else { + s.push(c); + } + } + + lines.push(s); + lines +} + #[cfg(test)] #[allow(non_snake_case)] @@ -474,8 +505,8 @@ mod tests { use Decoration::*; use Node::{ - Space as S, Newline as N, - ToggleItalic as Italic, ToggleBolder as Bold, ToggleMonospace as Mono, + Space as S, ToggleItalic as Italic, ToggleBolder as Bold, + Parbreak, Linebreak, }; use Expr::{Number as Num, Size as Sz, Bool}; @@ -484,6 +515,13 @@ mod tests { fn Pt(points: f32) -> Expr { Expr::Size(Size::pt(points)) } fn T(text: &str) -> Node { Node::Text(text.to_string()) } + /// Create a raw text node. + macro_rules! raw { + ($($line:expr),* $(,)?) => { + Node::Raw(vec![$($line.to_string()),*]) + }; + } + /// Create a tuple expression. macro_rules! tuple { ($($items:expr),* $(,)?) => { @@ -568,7 +606,7 @@ mod tests { #[test] fn unescape_strings() { fn test(string: &str, expected: &str) { - assert_eq!(unescape(string), expected.to_string()); + assert_eq!(unescape_string(string), expected.to_string()); } test(r#"hello world"#, "hello world"); @@ -577,24 +615,49 @@ mod tests { test(r#"a\\"#, "a\\"); test(r#"a\\\nbc"#, "a\\\nbc"); test(r#"a\tbc"#, "a\tbc"); - test("🌎", "🌎"); + test(r"🌎", "🌎"); + test(r"🌎\", r"🌎\"); + test(r"\🌎", r"\🌎"); } #[test] - fn parse_flat_nodes() { + fn unescape_raws() { + fn test(raw: &str, expected: Node) { + let vec = if let Node::Raw(v) = expected { v } else { panic!() }; + assert_eq!(unescape_raw(raw), vec); + } + + test("raw\\`", raw!["raw`"]); + test("raw\ntext", raw!["raw", "text"]); + test("a\r\nb", raw!["a", "b"]); + test("a\n\nb", raw!["a", "", "b"]); + test("a\r\x0Bb", raw!["a", "", "b"]); + test("a\r\n\r\nb", raw!["a", "", "b"]); + test("raw\\a", raw!["raw\\a"]); + test("raw\\", raw!["raw\\"]); + } + + #[test] + fn parse_basic_nodes() { // Basic nodes p!("" => []); p!("hi" => [T("hi")]); p!("*hi" => [Bold, T("hi")]); p!("hi_" => [T("hi"), Italic]); - p!("`py`" => [Mono, T("py"), Mono]); p!("hi you" => [T("hi"), S, T("you")]); p!("hi// you\nw" => [T("hi"), S, T("w")]); - p!("\n\n\nhello" => [N, T("hello")]); + p!("\n\n\nhello" => [Parbreak, T("hello")]); p!("first//\n//\nsecond" => [T("first"), S, S, T("second")]); - p!("first//\n \nsecond" => [T("first"), N, T("second")]); + p!("first//\n \nsecond" => [T("first"), Parbreak, T("second")]); p!("first/*\n \n*/second" => [T("first"), T("second")]); - p!("💜\n\n 🌍" => [T("💜"), N, T("🌍")]); + p!(r"a\ b" => [T("a"), Linebreak, S, T("b")]); + p!("💜\n\n 🌍" => [T("💜"), Parbreak, T("🌍")]); + + // Raw markup + p!("`py`" => [raw!["py"]]); + p!("[val][`hi]`]" => [func!("val"; [raw!["hi]"]])]); + p!("`hi\nyou" => [raw!["hi", "you"]], [(1:3, 1:3, "expected backtick")]); + p!("`hi\\`du`" => [raw!["hi`du"]]); // Spanned nodes p!("Hi" => [(0:0, 0:2, T("Hi"))]); @@ -924,7 +987,7 @@ mod tests { // Newline before function p!(" \n\r\n[val]" => - [(0:0, 2:0, N), (2:0, 2:5, func!((0:1, 0:4, "val")))], [], + [(0:0, 2:0, Parbreak), (2:0, 2:5, func!((0:1, 0:4, "val")))], [], [(2:1, 2:4, ValidFuncName)], ); diff --git a/src/syntax/tokens.rs b/src/syntax/tokens.rs index d78938e3c..cc65d9937 100644 --- a/src/syntax/tokens.rs +++ b/src/syntax/tokens.rs @@ -83,8 +83,17 @@ pub enum Token<'s> { Star, /// An underscore in body-text. Underscore, - /// A backtick in body-text. - Backtick, + + /// A backslash followed by whitespace in text. + Backslash, + + /// Raw text. + Raw { + /// The raw text (not yet unescaped as for strings). + raw: &'s str, + /// Whether the closing backtick was present. + terminated: bool, + }, /// Any other consecutive string. Text(&'s str), @@ -115,8 +124,9 @@ impl<'s> Token<'s> { ExprBool(_) => "bool", Star => "star", Underscore => "underscore", - Backtick => "backtick", - Text(_) => "invalid identifier", + Backslash => "backslash", + Raw { .. } => "raw text", + Text(_) => "text", Invalid("]") => "closing bracket", Invalid("*/") => "end of block comment", Invalid(_) => "invalid token", @@ -206,7 +216,7 @@ impl<'s> Iterator for Tokens<'s> { // Style toggles. '*' if self.mode == Body => Star, '_' if self.mode == Body => Underscore, - '`' if self.mode == Body => Backtick, + '`' if self.mode == Body => self.parse_raw(), // An escaped thing. '\\' if self.mode == Body => self.parse_escaped(), @@ -281,7 +291,7 @@ impl<'s> Tokens<'s> { } fn parse_function(&mut self, start: Position) -> Token<'s> { - let (header, terminated) = self.read_function_part(); + let (header, terminated) = self.read_function_part(Header); self.eat(); if self.peek() != Some('[') { @@ -291,7 +301,7 @@ impl<'s> Tokens<'s> { self.eat(); let body_start = self.pos() - start; - let (body, terminated) = self.read_function_part(); + let (body, terminated) = self.read_function_part(Body); let body_end = self.pos() - start; let span = Span::new(body_start, body_end); @@ -300,60 +310,73 @@ impl<'s> Tokens<'s> { Function { header, body: Some(Spanned { v: body, span }), terminated } } - fn read_function_part(&mut self) -> (&'s str, bool) { - let mut escaped = false; - let mut in_string = false; - let mut depth = 0; + fn read_function_part(&mut self, mode: TokenizationMode) -> (&'s str, bool) { + let start = self.index(); + let mut terminated = false; - self.read_string_until(|n| { - match n { - '"' if !escaped => in_string = !in_string, - '[' if !escaped && !in_string => depth += 1, - ']' if !escaped && !in_string => { - if depth == 0 { - return true; - } else { - depth -= 1; - } - } - '\\' => escaped = !escaped, - _ => escaped = false, + while let Some(n) = self.peek() { + if n == ']' { + terminated = true; + break; } - false - }, false, 0, 0) + self.eat(); + match n { + '[' => { self.parse_function(Position::ZERO); } + '/' if self.peek() == Some('/') => { self.parse_line_comment(); } + '/' if self.peek() == Some('*') => { self.parse_block_comment(); } + '"' if mode == Header => { self.parse_string(); } + '`' if mode == Body => { self.parse_raw(); } + '\\' => { self.eat(); } + _ => {} + } + } + + let end = self.index(); + (&self.src[start .. end], terminated) } fn parse_string(&mut self) -> Token<'s> { + let (string, terminated) = self.read_until_unescaped('"'); + ExprStr { string, terminated } + } + + fn parse_raw(&mut self) -> Token<'s> { + let (raw, terminated) = self.read_until_unescaped('`'); + Raw { raw, terminated } + } + + fn read_until_unescaped(&mut self, c: char) -> (&'s str, bool) { let mut escaped = false; - let (string, terminated) = self.read_string_until(|n| { + self.read_string_until(|n| { match n { - '"' if !escaped => return true, + n if n == c && !escaped => return true, '\\' => escaped = !escaped, _ => escaped = false, } false - }, true, 0, -1); - ExprStr { string, terminated } + }, true, 0, -1) } fn parse_escaped(&mut self) -> Token<'s> { fn is_escapable(c: char) -> bool { match c { - '[' | ']' | '\\' | '/' | '*' | '_' | '`' => true, + '[' | ']' | '\\' | '/' | '*' | '_' | '`' | '"' => true, _ => false, } } - Text(match self.peek() { + match self.peek() { Some(c) if is_escapable(c) => { let index = self.index(); self.eat(); - &self.src[index .. index + c.len_utf8()] + Text(&self.src[index .. index + c.len_utf8()]) } - _ => "\\" - }) + Some(c) if c.is_whitespace() => Backslash, + Some(_) => Text("\\"), + None => Backslash, + } } fn parse_expr(&mut self, text: &'s str) -> Token<'s> { @@ -462,6 +485,7 @@ pub fn is_identifier(string: &str) -> bool { true } + #[cfg(test)] mod tests { use super::super::test::check; @@ -483,6 +507,11 @@ mod tests { Token::ExprStr { string, terminated } } + #[allow(non_snake_case)] + fn Raw(raw: &'static str, terminated: bool) -> Token<'static> { + Token::Raw { raw, terminated } + } + /// Test whether the given string tokenizes into the given list of tokens. macro_rules! t { ($mode:expr, $source:expr => [$($tokens:tt)*]) => { @@ -540,10 +569,15 @@ mod tests { #[test] fn tokenize_body_only_tokens() { - t!(Body, "_*`" => [Underscore, Star, Backtick]); + t!(Body, "_*" => [Underscore, Star]); t!(Body, "***" => [Star, Star, Star]); t!(Body, "[func]*bold*" => [func!("func", None, true), Star, T("bold"), Star]); t!(Body, "hi_you_ there" => [T("hi"), Underscore, T("you"), Underscore, S(0), T("there")]); + t!(Body, "`raw`" => [Raw("raw", true)]); + t!(Body, "`[func]`" => [Raw("[func]", true)]); + t!(Body, "`]" => [Raw("]", false)]); + t!(Body, "`\\``" => [Raw("\\`", true)]); + t!(Body, "\\ " => [Backslash, S(0)]); t!(Header, "_*`" => [Invalid("_*`")]); } @@ -598,15 +632,46 @@ mod tests { t!(Header, "]" => [Invalid("]")]); } + #[test] + fn tokenize_correct_end_of_function() { + // End of function with strings and carets in headers + t!(Body, r#"[f: "]"# => [func!(r#"f: "]"#, None, false)]); + t!(Body, "[f: \"s\"]" => [func!("f: \"s\"", None, true)]); + t!(Body, r#"[f: \"\"\"]"# => [func!(r#"f: \"\"\""#, None, true)]); + t!(Body, "[f: `]" => [func!("f: `", None, true)]); + + // End of function with strings and carets in bodies + t!(Body, "[f][\"]" => [func!("f", Some((0:4, 0:5, "\"")), true)]); + t!(Body, r#"[f][\"]"# => [func!("f", Some((0:4, 0:6, r#"\""#)), true)]); + t!(Body, "[f][`]" => [func!("f", Some((0:4, 0:6, "`]")), false)]); + t!(Body, "[f][\\`]" => [func!("f", Some((0:4, 0:6, "\\`")), true)]); + t!(Body, "[f][`raw`]" => [func!("f", Some((0:4, 0:9, "`raw`")), true)]); + t!(Body, "[f][`raw]" => [func!("f", Some((0:4, 0:9, "`raw]")), false)]); + t!(Body, "[f][`raw]`]" => [func!("f", Some((0:4, 0:10, "`raw]`")), true)]); + t!(Body, "[f][`\\`]" => [func!("f", Some((0:4, 0:8, "`\\`]")), false)]); + t!(Body, "[f][`\\\\`]" => [func!("f", Some((0:4, 0:8, "`\\\\`")), true)]); + + // End of function with comments + t!(Body, "[f][/*]" => [func!("f", Some((0:4, 0:7, "/*]")), false)]); + t!(Body, "[f][/*`*/]" => [func!("f", Some((0:4, 0:9, "/*`*/")), true)]); + t!(Body, "[f: //]\n]" => [func!("f: //]\n", None, true)]); + t!(Body, "[f: \"//]\n]" => [func!("f: \"//]\n]", None, false)]); + + // End of function with escaped brackets + t!(Body, "[f][\\]]" => [func!("f", Some((0:4, 0:6, "\\]")), true)]); + t!(Body, "[f][\\[]" => [func!("f", Some((0:4, 0:6, "\\[")), true)]); + } + #[test] fn tokenize_escaped_symbols() { - t!(Body, r"\\" => [T(r"\")]); - t!(Body, r"\[" => [T("[")]); - t!(Body, r"\]" => [T("]")]); - t!(Body, r"\*" => [T("*")]); - t!(Body, r"\_" => [T("_")]); - t!(Body, r"\`" => [T("`")]); - t!(Body, r"\/" => [T("/")]); + t!(Body, r"\\" => [T(r"\")]); + t!(Body, r"\[" => [T("[")]); + t!(Body, r"\]" => [T("]")]); + t!(Body, r"\*" => [T("*")]); + t!(Body, r"\_" => [T("_")]); + t!(Body, r"\`" => [T("`")]); + t!(Body, r"\/" => [T("/")]); + t!(Body, r#"\""# => [T("\"")]); } #[test]