New syntax features 👔

- Forced line breaks with backslash followed by whitespace - (Multline) raw text in backticks - Set font class fallbacks with [font.family] (e.g. [font.family: monospace=("CMU Typewriter Text")]) - More sophisticated procedure to find end of function, which accounts for comments, strings, raw text and nested functions (this is a mix of a feature and a bug fix)
2025-07-13 15:42:53 +08:00 · 2020-02-13 21:58:49 +01:00 · 2020-02-13 21:58:49 +01:00 · 1658b00282
commit 1658b00282
parent 60099aed50
7 changed files with 272 additions and 117 deletions
--- a/src/layout/model.rs
+++ b/src/layout/model.rs
@ -164,7 +164,8 @@ impl<'a> ModelLayouter<'a> {
            match node {
                Space => self.layout_space(),
-                Newline => self.layout_paragraph(),
+                Parbreak => self.layout_paragraph(),
                Linebreak => self.layouter.finish_line(),
                Text(text) => {
                    if self.style.text.variant.style == FontStyle::Italic {
@ -175,10 +176,6 @@ impl<'a> ModelLayouter<'a> {
                        decorate(self, Decoration::Bold);
                    }
                    if self.style.text.monospace {
                        decorate(self, Decoration::Monospace);
                    }
                    self.layout_text(text).await;
                }
@ -192,12 +189,28 @@ impl<'a> ModelLayouter<'a> {
                    decorate(self, Decoration::Bold);
                }
-                ToggleMonospace => {
+                Raw(lines) => {
-                    self.style.text.monospace = !self.style.text.monospace;
+                    // TODO: Make this more efficient.
-                    decorate(self, Decoration::Monospace);
+                    let fallback = self.style.text.fallback.clone();
                    self.style.text.fallback.list.insert(0, "monospace".to_string());
                    self.style.text.fallback.flatten();
                    // Layout the first line.
                    let mut iter = lines.iter();
                    if let Some(line) = iter.next() {
                        self.layout_text(line).await;
                    }
-                Node::Model(model) => {
+                    // Put a newline before each following line.
                    for line in iter {
                        self.layouter.finish_line();
                        self.layout_text(line).await;
                    }
                    self.style.text.fallback = fallback;
                }
                Model(model) => {
                    self.layout(Spanned::new(model.as_ref(), *span)).await;
                }
            }
--- a/src/layout/text.rs
+++ b/src/layout/text.rs
@ -118,23 +118,13 @@ impl<'a> TextLayouter<'a> {
            variant.weight.0 += 300;
        }
-        let queried = if self.ctx.style.monospace {
+        let query = FontQuery {
            loader.get(FontQuery {
                // FIXME: This is a hack.
                fallback: std::iter::once("source code pro")
                    .chain(self.ctx.style.fallback.iter()),
                variant,
                c,
            }).await
        } else {
            loader.get(FontQuery {
            fallback: self.ctx.style.fallback.iter(),
            variant,
            c,
            }).await
        };
-        if let Some((font, index)) = queried {
+        if let Some((font, index)) = loader.get(query).await {
            // Determine the width of the char.
            let header = font.read_table::<Header>().ok()?;
            let font_unit_ratio = 1.0 / (header.units_per_em as f32);
--- a/src/library/font.rs
+++ b/src/library/font.rs
@ -9,21 +9,45 @@ function! {
    pub struct FontFamilyFunc {
        body: Option<SyntaxModel>,
        list: Vec<String>,
        classes: Vec<(String, Vec<String>)>,
    }
    parse(header, body, ctx, f) {
        let list = header.args.pos.get_all::<StringLike>(&mut f.errors)
            .map(|s| s.0.to_lowercase())
            .collect();
        let tuples: Vec<_> = header.args.key
            .get_all::<String, Tuple>(&mut f.errors)
            .collect();
        let classes = tuples.into_iter()
            .map(|(class, mut tuple)| {
                let fallback = tuple.get_all::<StringLike>(&mut f.errors)
                    .map(|s| s.0.to_lowercase())
                    .collect();
                (class.to_lowercase(), fallback)
            })
            .collect();
        FontFamilyFunc {
            body: body!(opt: body, ctx, f),
-            list: header.args.pos.get_all::<StringLike>(&mut f.errors)
+            list,
-                .map(|s| s.0.to_lowercase())
+            classes,
                .collect(),
        }
    }
    layout(self, ctx, errors) {
-        styled(&self.body, ctx, Some(&self.list),
+        styled(&self.body, ctx, Some(()),
-            |s, list| {
+            |s, _| {
-                s.fallback.list = list.clone();
+                if !self.list.is_empty() {
                    s.fallback.list = self.list.clone();
                }
                for (class, fallback) in &self.classes {
                    s.fallback.set_class_list(class.clone(), fallback.clone());
                }
                s.fallback.flatten();
            })
    }
--- a/src/style.rs
+++ b/src/style.rs
@ -24,8 +24,6 @@ pub struct TextStyle {
    /// Whether the bolder toggle is active or inactive. This determines
    /// whether the next `*` adds or removes font weight.
    pub bolder: bool,
    /// Whether the monospace toggle is active or inactive.
    pub monospace: bool,
    /// The base font size.
    pub base_font_size: Size,
    /// The font scale to apply on the base font size.
@ -79,7 +77,6 @@ impl Default for TextStyle {
                weight: FontWeight(400),
            },
            bolder: false,
            monospace: false,
            base_font_size: Size::pt(11.0),
            font_scale: 1.0,
            word_spacing_scale: 0.25,
--- a/src/syntax/mod.rs
+++ b/src/syntax/mod.rs
@ -62,15 +62,17 @@ pub enum Node {
    /// Whitespace containing less than two newlines.
    Space,
    /// Whitespace with more than two newlines.
-    Newline,
+    Parbreak,
    /// A forced line break.
    Linebreak,
    /// Plain text.
    Text(String),
    /// Lines of raw text.
    Raw(Vec<String>),
    /// Italics were enabled / disabled.
    ToggleItalic,
    /// Bolder was enabled / disabled.
    ToggleBolder,
    /// Monospace was enabled / disabled.
    ToggleMonospace,
    /// A submodel, typically a function invocation.
    Model(Box<dyn Model>),
 }
@ -80,11 +82,12 @@ impl PartialEq for Node {
        use Node::*;
        match (self, other) {
            (Space, Space) => true,
-            (Newline, Newline) => true,
+            (Parbreak, Parbreak) => true,
            (Linebreak, Linebreak) => true,
            (Text(a), Text(b)) => a == b,
            (Raw(a), Raw(b)) => a == b,
            (ToggleItalic, ToggleItalic) => true,
            (ToggleBolder, ToggleBolder) => true,
            (ToggleMonospace, ToggleMonospace) => true,
            (Model(a), Model(b)) => a == b,
            _ => false,
        }
@ -107,6 +110,7 @@ pub enum Decoration {
    ///  ^^^^^^
    /// ```
    InvalidFuncName,
    /// A key of a keyword argument:
    /// ```typst
    /// [box: width=5cm]
@ -119,12 +123,11 @@ pub enum Decoration {
    ///                 ^^^^       ^^^^^
    /// ```
    ObjectKey,
    /// An italic word.
    Italic,
    /// A bold word.
    Bold,
    /// A monospace word.
    Monospace,
 }
 impl dyn Model {
--- a/src/syntax/parsing.rs
+++ b/src/syntax/parsing.rs
@ -33,10 +33,12 @@ pub fn parse(start: Position, src: &str, ctx: ParseContext) -> Pass<SyntaxModel>
        let span = token.span;
        let node = match token.v {
            Token::LineComment(_) | Token::BlockComment(_) => continue,
            // Only at least two newlines mean a _real_ newline indicating a
            // paragraph break.
            Token::Space(newlines) => if newlines >= 2 {
-                Node::Newline
+                Node::Parbreak
            } else {
                Node::Space
            },
@ -55,10 +57,18 @@ pub fn parse(start: Position, src: &str, ctx: ParseContext) -> Pass<SyntaxModel>
            Token::Star       => Node::ToggleBolder,
            Token::Underscore => Node::ToggleItalic,
-            Token::Backtick   => Node::ToggleMonospace,
+            Token::Backslash  => Node::Linebreak,
            Token::Text(text) => Node::Text(text.to_string()),
-            Token::LineComment(_) | Token::BlockComment(_) => continue,
+            Token::Raw { raw, terminated } => {
                if !terminated {
                    feedback.errors.push(err!(Span::at(span.end);
                        "expected backtick"));
                }
                Node::Raw(unescape_raw(raw))
            }
            Token::Text(text) => Node::Text(text.to_string()),
            other => {
                feedback.errors.push(err!(span; "unexpected {}", other.name()));
@ -219,7 +229,7 @@ impl<'s> FuncParser<'s> {
                    self.expected_at("quote", first.span.end);
                }
-                take!(Expr::Str(unescape(string)))
+                take!(Expr::Str(unescape_string(string)))
            }
            Token::ExprNumber(n) => take!(Expr::Number(n)),
@ -433,36 +443,57 @@ impl<'s> FuncParser<'s> {
    }
 }
-/// Unescape a string.
+/// Unescape a string: `the string is \"this\"` => `the string is "this"`.
-fn unescape(string: &str) -> String {
+fn unescape_string(string: &str) -> String {
    let mut s = String::with_capacity(string.len());
-    let mut escaped = false;
+    let mut iter = string.chars();
-    for c in string.chars() {
+    while let Some(c) = iter.next() {
        if c == '\\' {
-            if escaped {
+            match iter.next() {
-                s.push('\\');
+                Some('\\') => s.push('\\'),
-            }
+                Some('"') => s.push('"'),
-            escaped = !escaped;
+                Some('n') => s.push('\n'),
-        } else {
+                Some('t') => s.push('\t'),
-            if escaped {
+                Some(c) => { s.push('\\'); s.push(c); }
-                match c {
+                None => s.push('\\'),
                    '"' => s.push('"'),
                    'n' => s.push('\n'),
                    't' => s.push('\t'),
                    c => { s.push('\\'); s.push(c); }
            }
        } else {
            s.push(c);
        }
            escaped = false;
        }
    }
    s
 }
 /// Unescape raw markup into lines.
 fn unescape_raw(raw: &str) -> Vec<String> {
    let mut lines = Vec::new();
    let mut s = String::new();
    let mut iter = raw.chars().peekable();
    while let Some(c) = iter.next() {
        if c == '\\' {
            match iter.next() {
                Some('`') => s.push('`'),
                Some(c) => { s.push('\\'); s.push(c); }
                None => s.push('\\'),
            }
        } else if is_newline_char(c) {
            if c == '\r' && iter.peek() == Some(&'\n') {
                iter.next();
            }
            lines.push(std::mem::replace(&mut s, String::new()));
        } else {
            s.push(c);
        }
    }
    lines.push(s);
    lines
 }
 #[cfg(test)]
 #[allow(non_snake_case)]
@ -474,8 +505,8 @@ mod tests {
    use Decoration::*;
    use Node::{
-        Space as S, Newline as N,
+        Space as S, ToggleItalic as Italic, ToggleBolder as Bold,
-        ToggleItalic as Italic, ToggleBolder as Bold, ToggleMonospace as Mono,
+        Parbreak, Linebreak,
    };
    use Expr::{Number as Num, Size as Sz, Bool};
@ -484,6 +515,13 @@ mod tests {
    fn Pt(points: f32) -> Expr { Expr::Size(Size::pt(points)) }
    fn T(text: &str) -> Node { Node::Text(text.to_string()) }
    /// Create a raw text node.
    macro_rules! raw {
        ($($line:expr),* $(,)?) => {
            Node::Raw(vec![$($line.to_string()),*])
        };
    }
    /// Create a tuple expression.
    macro_rules! tuple {
        ($($items:expr),* $(,)?) => {
@ -568,7 +606,7 @@ mod tests {
    #[test]
    fn unescape_strings() {
        fn test(string: &str, expected: &str) {
-            assert_eq!(unescape(string), expected.to_string());
+            assert_eq!(unescape_string(string), expected.to_string());
        }
        test(r#"hello world"#,  "hello world");
@ -577,24 +615,49 @@ mod tests {
        test(r#"a\\"#,          "a\\");
        test(r#"a\\\nbc"#,      "a\\\nbc");
        test(r#"a\tbc"#,        "a\tbc");
-        test("🌎",              "🌎");
+        test(r"🌎",             "🌎");
        test(r"🌎\",            r"🌎\");
        test(r"\🌎",            r"\🌎");
    }
    #[test]
-    fn parse_flat_nodes() {
+    fn unescape_raws() {
        fn test(raw: &str, expected: Node) {
            let vec = if let Node::Raw(v) = expected { v } else { panic!() };
            assert_eq!(unescape_raw(raw), vec);
        }
        test("raw\\`",     raw!["raw`"]);
        test("raw\ntext",  raw!["raw", "text"]);
        test("a\r\nb",     raw!["a", "b"]);
        test("a\n\nb",     raw!["a", "", "b"]);
        test("a\r\x0Bb",   raw!["a", "", "b"]);
        test("a\r\n\r\nb", raw!["a", "", "b"]);
        test("raw\\a",     raw!["raw\\a"]);
        test("raw\\",      raw!["raw\\"]);
    }
    #[test]
    fn parse_basic_nodes() {
        // Basic nodes
        p!(""                     => []);
        p!("hi"                   => [T("hi")]);
        p!("*hi"                  => [Bold, T("hi")]);
        p!("hi_"                  => [T("hi"), Italic]);
        p!("`py`"                 => [Mono, T("py"), Mono]);
        p!("hi you"               => [T("hi"), S, T("you")]);
        p!("hi// you\nw"          => [T("hi"), S, T("w")]);
-        p!("\n\n\nhello"          => [N, T("hello")]);
+        p!("\n\n\nhello"          => [Parbreak, T("hello")]);
        p!("first//\n//\nsecond"  => [T("first"), S, S, T("second")]);
-        p!("first//\n \nsecond"   => [T("first"), N, T("second")]);
+        p!("first//\n \nsecond"   => [T("first"), Parbreak, T("second")]);
        p!("first/*\n \n*/second" => [T("first"), T("second")]);
-        p!("💜\n\n 🌍"            => [T("💜"), N, T("🌍")]);
+        p!(r"a\ b"                => [T("a"), Linebreak, S, T("b")]);
        p!("💜\n\n 🌍"            => [T("💜"), Parbreak, T("🌍")]);
        // Raw markup
        p!("`py`"         => [raw!["py"]]);
        p!("[val][`hi]`]" => [func!("val"; [raw!["hi]"]])]);
        p!("`hi\nyou"     => [raw!["hi", "you"]], [(1:3, 1:3, "expected backtick")]);
        p!("`hi\\`du`"    => [raw!["hi`du"]]);
        // Spanned nodes
        p!("Hi"      => [(0:0, 0:2, T("Hi"))]);
@ -924,7 +987,7 @@ mod tests {
        // Newline before function
        p!(" \n\r\n[val]" =>
-            [(0:0, 2:0, N), (2:0, 2:5, func!((0:1, 0:4, "val")))], [],
+            [(0:0, 2:0, Parbreak), (2:0, 2:5, func!((0:1, 0:4, "val")))], [],
            [(2:1, 2:4, ValidFuncName)],
        );
--- a/src/syntax/tokens.rs
+++ b/src/syntax/tokens.rs
@ -83,8 +83,17 @@ pub enum Token<'s> {
    Star,
    /// An underscore in body-text.
    Underscore,
-    /// A backtick in body-text.
+
-    Backtick,
+    /// A backslash followed by whitespace in text.
    Backslash,
    /// Raw text.
    Raw {
        /// The raw text (not yet unescaped as for strings).
        raw: &'s str,
        /// Whether the closing backtick was present.
        terminated: bool,
    },
    /// Any other consecutive string.
    Text(&'s str),
@ -115,8 +124,9 @@ impl<'s> Token<'s> {
            ExprBool(_)     => "bool",
            Star            => "star",
            Underscore      => "underscore",
-            Backtick        => "backtick",
+            Backslash       => "backslash",
-            Text(_)         => "invalid identifier",
+            Raw { .. }      => "raw text",
            Text(_)         => "text",
            Invalid("]")    => "closing bracket",
            Invalid("*/")   => "end of block comment",
            Invalid(_)      => "invalid token",
@ -206,7 +216,7 @@ impl<'s> Iterator for Tokens<'s> {
            // Style toggles.
            '*' if self.mode == Body => Star,
            '_' if self.mode == Body => Underscore,
-            '`' if self.mode == Body => Backtick,
+            '`' if self.mode == Body => self.parse_raw(),
            // An escaped thing.
            '\\' if self.mode == Body => self.parse_escaped(),
@ -281,7 +291,7 @@ impl<'s> Tokens<'s> {
    }
    fn parse_function(&mut self, start: Position) -> Token<'s> {
-        let (header, terminated) = self.read_function_part();
+        let (header, terminated) = self.read_function_part(Header);
        self.eat();
        if self.peek() != Some('[') {
@ -291,7 +301,7 @@ impl<'s> Tokens<'s> {
        self.eat();
        let body_start = self.pos() - start;
-        let (body, terminated) = self.read_function_part();
+        let (body, terminated) = self.read_function_part(Body);
        let body_end = self.pos() - start;
        let span = Span::new(body_start, body_end);
@ -300,60 +310,73 @@ impl<'s> Tokens<'s> {
        Function { header, body: Some(Spanned { v: body, span }), terminated }
    }
-    fn read_function_part(&mut self) -> (&'s str, bool) {
+    fn read_function_part(&mut self, mode: TokenizationMode) -> (&'s str, bool) {
-        let mut escaped = false;
+        let start = self.index();
-        let mut in_string = false;
+        let mut terminated = false;
        let mut depth = 0;
-        self.read_string_until(|n| {
+        while let Some(n) = self.peek() {
            if n == ']' {
                terminated = true;
                break;
            }
            self.eat();
            match n {
-                '"' if !escaped => in_string = !in_string,
+                '[' => { self.parse_function(Position::ZERO); }
-                '[' if !escaped && !in_string => depth += 1,
+                '/' if self.peek() == Some('/') => { self.parse_line_comment(); }
-                ']' if !escaped && !in_string => {
+                '/' if self.peek() == Some('*') => { self.parse_block_comment(); }
-                    if depth == 0 {
+                '"' if mode == Header => { self.parse_string(); }
-                        return true;
+                '`' if mode == Body => { self.parse_raw(); }
-                    } else {
+                '\\' => { self.eat(); }
-                        depth -= 1;
+                _ => {}
            }
        }
                '\\' => escaped = !escaped,
                _ => escaped = false,
            }
-            false
+        let end = self.index();
-        }, false, 0, 0)
+        (&self.src[start .. end], terminated)
    }
    fn parse_string(&mut self) -> Token<'s> {
        let (string, terminated) = self.read_until_unescaped('"');
        ExprStr { string, terminated }
    }
    fn parse_raw(&mut self) -> Token<'s> {
        let (raw, terminated) = self.read_until_unescaped('`');
        Raw { raw, terminated }
    }
    fn read_until_unescaped(&mut self, c: char) -> (&'s str, bool) {
        let mut escaped = false;
-        let (string, terminated) = self.read_string_until(|n| {
+        self.read_string_until(|n| {
            match n {
-                '"' if !escaped => return true,
+                n if n == c && !escaped => return true,
                '\\' => escaped = !escaped,
                _ => escaped = false,
            }
            false
-        }, true, 0, -1);
+        }, true, 0, -1)
        ExprStr { string, terminated }
    }
    fn parse_escaped(&mut self) -> Token<'s> {
        fn is_escapable(c: char) -> bool {
            match c {
-                '[' | ']' | '\\' | '/' | '*' | '_' | '`' => true,
+                '[' | ']' | '\\' | '/' | '*' | '_' | '`' | '"' => true,
                _ => false,
            }
        }
-        Text(match self.peek() {
+        match self.peek() {
            Some(c) if is_escapable(c) => {
                let index = self.index();
                self.eat();
-                &self.src[index .. index + c.len_utf8()]
+                Text(&self.src[index .. index + c.len_utf8()])
            }
            Some(c) if c.is_whitespace() => Backslash,
            Some(_) => Text("\\"),
            None => Backslash,
        }
            _ => "\\"
        })
    }
    fn parse_expr(&mut self, text: &'s str) -> Token<'s> {
@ -462,6 +485,7 @@ pub fn is_identifier(string: &str) -> bool {
    true
 }
 #[cfg(test)]
 mod tests {
    use super::super::test::check;
@ -483,6 +507,11 @@ mod tests {
        Token::ExprStr { string, terminated }
    }
    #[allow(non_snake_case)]
    fn Raw(raw: &'static str, terminated: bool) -> Token<'static> {
        Token::Raw { raw, terminated }
    }
    /// Test whether the given string tokenizes into the given list of tokens.
    macro_rules! t {
        ($mode:expr, $source:expr => [$($tokens:tt)*]) => {
@ -540,10 +569,15 @@ mod tests {
    #[test]
    fn tokenize_body_only_tokens() {
-        t!(Body, "_*`"           => [Underscore, Star, Backtick]);
+        t!(Body, "_*"            => [Underscore, Star]);
        t!(Body, "***"           => [Star, Star, Star]);
        t!(Body, "[func]*bold*"  => [func!("func", None, true), Star, T("bold"), Star]);
        t!(Body, "hi_you_ there" => [T("hi"), Underscore, T("you"), Underscore, S(0), T("there")]);
        t!(Body, "`raw`"         => [Raw("raw", true)]);
        t!(Body, "`[func]`"      => [Raw("[func]", true)]);
        t!(Body, "`]"            => [Raw("]", false)]);
        t!(Body, "`\\``"         => [Raw("\\`", true)]);
        t!(Body, "\\ "           => [Backslash, S(0)]);
        t!(Header, "_*`"         => [Invalid("_*`")]);
    }
@ -598,6 +632,36 @@ mod tests {
        t!(Header, "]"            => [Invalid("]")]);
    }
    #[test]
    fn tokenize_correct_end_of_function() {
        // End of function with strings and carets in headers
        t!(Body, r#"[f: "]"#      => [func!(r#"f: "]"#, None, false)]);
        t!(Body, "[f: \"s\"]"     => [func!("f: \"s\"", None, true)]);
        t!(Body, r#"[f: \"\"\"]"# => [func!(r#"f: \"\"\""#, None, true)]);
        t!(Body, "[f: `]"         => [func!("f: `", None, true)]);
        // End of function with strings and carets in bodies
        t!(Body, "[f][\"]"        => [func!("f", Some((0:4, 0:5, "\"")), true)]);
        t!(Body, r#"[f][\"]"#     => [func!("f", Some((0:4, 0:6, r#"\""#)), true)]);
        t!(Body, "[f][`]"         => [func!("f", Some((0:4, 0:6, "`]")), false)]);
        t!(Body, "[f][\\`]"       => [func!("f", Some((0:4, 0:6, "\\`")), true)]);
        t!(Body, "[f][`raw`]"     => [func!("f", Some((0:4, 0:9, "`raw`")), true)]);
        t!(Body, "[f][`raw]"      => [func!("f", Some((0:4, 0:9, "`raw]")), false)]);
        t!(Body, "[f][`raw]`]"    => [func!("f", Some((0:4, 0:10, "`raw]`")), true)]);
        t!(Body, "[f][`\\`]"      => [func!("f", Some((0:4, 0:8, "`\\`]")), false)]);
        t!(Body, "[f][`\\\\`]"    => [func!("f", Some((0:4, 0:8, "`\\\\`")), true)]);
        // End of function with comments
        t!(Body, "[f][/*]"        => [func!("f", Some((0:4, 0:7, "/*]")), false)]);
        t!(Body, "[f][/*`*/]"     => [func!("f", Some((0:4, 0:9, "/*`*/")), true)]);
        t!(Body, "[f: //]\n]"     => [func!("f: //]\n", None, true)]);
        t!(Body, "[f: \"//]\n]"   => [func!("f: \"//]\n]", None, false)]);
        // End of function with escaped brackets
        t!(Body, "[f][\\]]"       => [func!("f", Some((0:4, 0:6, "\\]")), true)]);
        t!(Body, "[f][\\[]"       => [func!("f", Some((0:4, 0:6, "\\[")), true)]);
    }
    #[test]
    fn tokenize_escaped_symbols() {
        t!(Body, r"\\"   => [T(r"\")]);
@ -607,6 +671,7 @@ mod tests {
        t!(Body, r"\_"   => [T("_")]);
        t!(Body, r"\`"   => [T("`")]);
        t!(Body, r"\/"   => [T("/")]);
        t!(Body, r#"\""# => [T("\"")]);
    }
    #[test]