From f3bdc9d3daca2c26c4cc745be48a5ce1c8a10641 Mon Sep 17 00:00:00 2001 From: Laurenz Date: Tue, 30 Nov 2021 18:55:50 +0100 Subject: [PATCH] Syntax highlighting --- src/library/page.rs | 16 +-- src/parse/mod.rs | 2 +- src/parse/tokens.rs | 38 +++---- src/source.rs | 52 ++++++++- src/syntax/ast.rs | 59 +++++----- src/syntax/highlight.rs | 231 ++++++++++++++++++++++++++++++++++++++++ src/syntax/mod.rs | 6 +- 7 files changed, 345 insertions(+), 59 deletions(-) create mode 100644 src/syntax/highlight.rs diff --git a/src/library/page.rs b/src/library/page.rs index 0289401ae..0d29ddb67 100644 --- a/src/library/page.rs +++ b/src/library/page.rs @@ -10,8 +10,8 @@ pub fn page(ctx: &mut EvalContext, args: &mut Args) -> TypResult { } let paper = args.named::("paper")?.or_else(|| args.find()); - let width = args.named("width")?; - let height = args.named("height")?; + let width = args.named::>("width")?; + let height = args.named::>("height")?; let flip = args.named("flip")?; let margins = args.named("margins")?; let left = args.named("left")?; @@ -30,16 +30,16 @@ pub fn page(ctx: &mut EvalContext, args: &mut Args) -> TypResult { if let Some(width) = width { page.class = PaperClass::Custom; - page.size.x = width; - } - - if flip.unwrap_or(false) { - std::mem::swap(&mut page.size.x, &mut page.size.y); + page.size.x = width.unwrap_or(Length::inf()); } if let Some(height) = height { page.class = PaperClass::Custom; - page.size.y = height; + page.size.y = height.unwrap_or(Length::inf()); + } + + if flip.unwrap_or(false) { + std::mem::swap(&mut page.size.x, &mut page.size.y); } if let Some(margins) = margins { diff --git a/src/parse/mod.rs b/src/parse/mod.rs index f9c0049f0..dbec0a5e7 100644 --- a/src/parse/mod.rs +++ b/src/parse/mod.rs @@ -94,7 +94,7 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) { | NodeKind::Linebreak | NodeKind::Raw(_) | NodeKind::Math(_) - | NodeKind::UnicodeEscape(_) => { + | NodeKind::Escape(_) => { p.eat(); } diff --git a/src/parse/tokens.rs b/src/parse/tokens.rs index f80d345ed..07a6fe125 100644 --- a/src/parse/tokens.rs +++ b/src/parse/tokens.rs @@ -232,7 +232,7 @@ impl<'s> Tokens<'s> { // Markup. '*' | '_' | '=' | '~' | '`' | '$' => { self.s.eat_assert(c); - NodeKind::Text(c.into()) + NodeKind::Escape(c) } 'u' if self.s.rest().starts_with("u{") => { self.s.eat_assert('u'); @@ -240,7 +240,7 @@ impl<'s> Tokens<'s> { let sequence = self.s.eat_while(|c| c.is_ascii_alphanumeric()); if self.s.eat_if('}') { if let Some(c) = resolve_hex(sequence) { - NodeKind::UnicodeEscape(c) + NodeKind::Escape(c) } else { NodeKind::Error( ErrorPos::Full, @@ -554,10 +554,6 @@ mod tests { use Option::None; use TokenMode::{Code, Markup}; - fn UnicodeEscape(c: char) -> NodeKind { - NodeKind::UnicodeEscape(c) - } - fn Error(pos: ErrorPos, message: &str) -> NodeKind { NodeKind::Error(pos, message.into()) } @@ -641,7 +637,7 @@ mod tests { ('/', None, "/**/", BlockComment), ('/', Some(Markup), "*", Strong), ('/', Some(Markup), "$ $", Math(" ", false)), - ('/', Some(Markup), r"\\", Text("\\")), + ('/', Some(Markup), r"\\", Escape('\\')), ('/', Some(Markup), "#let", Let), ('/', Some(Code), "(", LeftParen), ('/', Some(Code), ":", Colon), @@ -741,19 +737,19 @@ mod tests { #[test] fn test_tokenize_escape_sequences() { // Test escapable symbols. - t!(Markup: r"\\" => Text(r"\")); - t!(Markup: r"\/" => Text("/")); - t!(Markup: r"\[" => Text("[")); - t!(Markup: r"\]" => Text("]")); - t!(Markup: r"\{" => Text("{")); - t!(Markup: r"\}" => Text("}")); - t!(Markup: r"\*" => Text("*")); - t!(Markup: r"\_" => Text("_")); - t!(Markup: r"\=" => Text("=")); - t!(Markup: r"\~" => Text("~")); - t!(Markup: r"\`" => Text("`")); - t!(Markup: r"\$" => Text("$")); - t!(Markup: r"\#" => Text("#")); + t!(Markup: r"\\" => Escape('\\')); + t!(Markup: r"\/" => Escape('/')); + t!(Markup: r"\[" => Escape('[')); + t!(Markup: r"\]" => Escape(']')); + t!(Markup: r"\{" => Escape('{')); + t!(Markup: r"\}" => Escape('}')); + t!(Markup: r"\*" => Escape('*')); + t!(Markup: r"\_" => Escape('_')); + t!(Markup: r"\=" => Escape('=')); + t!(Markup: r"\~" => Escape('~')); + t!(Markup: r"\`" => Escape('`')); + t!(Markup: r"\$" => Escape('$')); + t!(Markup: r"\#" => Escape('#')); // Test unescapable symbols. t!(Markup[" /"]: r"\a" => Text(r"\"), Text("a")); @@ -763,7 +759,7 @@ mod tests { // Test basic unicode escapes. t!(Markup: r"\u{}" => Error(Full, "invalid unicode escape sequence")); - t!(Markup: r"\u{2603}" => UnicodeEscape('☃')); + t!(Markup: r"\u{2603}" => Escape('☃')); t!(Markup: r"\u{P}" => Error(Full, "invalid unicode escape sequence")); // Test unclosed unicode escapes. diff --git a/src/source.rs b/src/source.rs index 74fa8d55b..509b0a76b 100644 --- a/src/source.rs +++ b/src/source.rs @@ -12,7 +12,7 @@ use crate::diag::TypResult; use crate::loading::{FileHash, Loader}; use crate::parse::{is_newline, parse, Scanner}; use crate::syntax::ast::Markup; -use crate::syntax::{GreenNode, RedNode}; +use crate::syntax::{self, Category, GreenNode, RedNode}; use crate::util::PathExt; #[cfg(feature = "codespan-reporting")] @@ -190,6 +190,11 @@ impl SourceFile { self.line_starts.len() } + /// Return the index of the UTF-16 code unit at the byte index. + pub fn byte_to_utf16(&self, byte_idx: usize) -> Option { + Some(self.src.get(.. byte_idx)?.chars().map(char::len_utf16).sum()) + } + /// Return the index of the line that contains the given byte index. pub fn byte_to_line(&self, byte_idx: usize) -> Option { (byte_idx <= self.src.len()).then(|| { @@ -211,6 +216,18 @@ impl SourceFile { Some(head.chars().count()) } + /// Return the index of the UTF-16 code unit at the byte index. + pub fn utf16_to_byte(&self, utf16_idx: usize) -> Option { + let mut k = 0; + for (i, c) in self.src.char_indices() { + if k >= utf16_idx { + return Some(i); + } + k += c.len_utf16(); + } + (k == utf16_idx).then(|| self.src.len()) + } + /// Return the byte position at which the given line starts. pub fn line_to_byte(&self, line_idx: usize) -> Option { self.line_starts.get(line_idx).copied() @@ -260,6 +277,18 @@ impl SourceFile { // Recalculate the line starts after the edit. self.line_starts .extend(newlines(&self.src[start ..]).map(|idx| start + idx)); + + // Reparse. + self.root = parse(&self.src); + } + + /// Provide highlighting categories for the given range of the source file. + pub fn highlight(&self, range: Range, mut f: F) + where + F: FnMut(Range, Category), + { + let red = RedNode::from_root(self.root.clone(), self.id); + syntax::highlight(red.as_ref(), range, &mut f) } } @@ -373,6 +402,27 @@ mod tests { assert_eq!(source.byte_to_column(12), Some(2)); } + #[test] + fn test_source_file_utf16() { + #[track_caller] + fn roundtrip(source: &SourceFile, byte_idx: usize, utf16_idx: usize) { + let middle = source.byte_to_utf16(byte_idx).unwrap(); + let result = source.utf16_to_byte(middle).unwrap(); + assert_eq!(middle, utf16_idx); + assert_eq!(result, byte_idx); + } + + let source = SourceFile::detached(TEST); + roundtrip(&source, 0, 0); + roundtrip(&source, 2, 1); + roundtrip(&source, 3, 2); + roundtrip(&source, 8, 7); + roundtrip(&source, 12, 9); + roundtrip(&source, 21, 18); + assert_eq!(source.byte_to_utf16(22), None); + assert_eq!(source.utf16_to_byte(19), None); + } + #[test] fn test_source_file_roundtrip() { #[track_caller] diff --git a/src/syntax/ast.rs b/src/syntax/ast.rs index 0849dd580..4d698b5ee 100644 --- a/src/syntax/ast.rs +++ b/src/syntax/ast.rs @@ -64,7 +64,7 @@ impl Markup { NodeKind::Strong => Some(MarkupNode::Strong), NodeKind::Emph => Some(MarkupNode::Emph), NodeKind::Text(s) => Some(MarkupNode::Text(s.clone())), - NodeKind::UnicodeEscape(c) => Some(MarkupNode::Text((*c).into())), + NodeKind::Escape(c) => Some(MarkupNode::Text((*c).into())), NodeKind::EnDash => Some(MarkupNode::Text('\u{2013}'.into())), NodeKind::EmDash => Some(MarkupNode::Text('\u{2014}'.into())), NodeKind::NonBreakingSpace => Some(MarkupNode::Text('\u{00A0}'.into())), @@ -581,39 +581,46 @@ impl BinOp { /// The precedence of this operator. pub fn precedence(self) -> usize { match self { - Self::Mul | Self::Div => 6, - Self::Add | Self::Sub => 5, - Self::Eq | Self::Neq | Self::Lt | Self::Leq | Self::Gt | Self::Geq => 4, + Self::Mul => 6, + Self::Div => 6, + Self::Add => 5, + Self::Sub => 5, + Self::Eq => 4, + Self::Neq => 4, + Self::Lt => 4, + Self::Leq => 4, + Self::Gt => 4, + Self::Geq => 4, Self::And => 3, Self::Or => 2, - Self::Assign - | Self::AddAssign - | Self::SubAssign - | Self::MulAssign - | Self::DivAssign => 1, + Self::Assign => 1, + Self::AddAssign => 1, + Self::SubAssign => 1, + Self::MulAssign => 1, + Self::DivAssign => 1, } } /// The associativity of this operator. pub fn associativity(self) -> Associativity { match self { - Self::Add - | Self::Sub - | Self::Mul - | Self::Div - | Self::And - | Self::Or - | Self::Eq - | Self::Neq - | Self::Lt - | Self::Leq - | Self::Gt - | Self::Geq => Associativity::Left, - Self::Assign - | Self::AddAssign - | Self::SubAssign - | Self::MulAssign - | Self::DivAssign => Associativity::Right, + Self::Add => Associativity::Left, + Self::Sub => Associativity::Left, + Self::Mul => Associativity::Left, + Self::Div => Associativity::Left, + Self::And => Associativity::Left, + Self::Or => Associativity::Left, + Self::Eq => Associativity::Left, + Self::Neq => Associativity::Left, + Self::Lt => Associativity::Left, + Self::Leq => Associativity::Left, + Self::Gt => Associativity::Left, + Self::Geq => Associativity::Left, + Self::Assign => Associativity::Right, + Self::AddAssign => Associativity::Right, + Self::SubAssign => Associativity::Right, + Self::MulAssign => Associativity::Right, + Self::DivAssign => Associativity::Right, } } diff --git a/src/syntax/highlight.rs b/src/syntax/highlight.rs new file mode 100644 index 000000000..22e6cf508 --- /dev/null +++ b/src/syntax/highlight.rs @@ -0,0 +1,231 @@ +use std::ops::Range; + +use super::{NodeKind, RedRef}; + +/// Provide highlighting categories for the children of a node that fall into a +/// range. +pub fn highlight(node: RedRef, range: Range, f: &mut F) +where + F: FnMut(Range, Category), +{ + for child in node.children() { + let span = child.span(); + if range.start <= span.end && range.end >= span.start { + if let Some(category) = Category::determine(child, node) { + f(span.to_range(), category); + } + highlight(child, range.clone(), f); + } + } +} + +/// The syntax highlighting category of a node. +#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)] +pub enum Category { + /// Any kind of bracket, parenthesis or brace. + Bracket, + /// Punctuation in code. + Punctuation, + /// A line or block comment. + Comment, + /// Strong text. + Strong, + /// Emphasized text. + Emph, + /// Raw text or code. + Raw, + /// A math formula. + Math, + /// A section heading. + Heading, + /// A list or enumeration. + List, + /// An easily typable shortcut to a unicode codepoint. + Shortcut, + /// An escape sequence. + Escape, + /// A keyword. + Keyword, + /// An operator symbol. + Operator, + /// The none literal. + None, + /// The auto literal. + Auto, + /// A boolean literal. + Bool, + /// A numeric literal. + Number, + /// A string literal. + String, + /// A function. + Function, + /// A variable. + Variable, + /// An invalid node. + Invalid, +} + +impl Category { + /// Determine the highlighting category of a node given its parent. + pub fn determine(child: RedRef, parent: RedRef) -> Option { + match child.kind() { + NodeKind::LeftBracket => Some(Category::Bracket), + NodeKind::RightBracket => Some(Category::Bracket), + NodeKind::LeftBrace => Some(Category::Bracket), + NodeKind::RightBrace => Some(Category::Bracket), + NodeKind::LeftParen => Some(Category::Bracket), + NodeKind::RightParen => Some(Category::Bracket), + NodeKind::Comma => Some(Category::Punctuation), + NodeKind::Semicolon => Some(Category::Punctuation), + NodeKind::Colon => Some(Category::Punctuation), + NodeKind::LineComment => Some(Category::Comment), + NodeKind::BlockComment => Some(Category::Comment), + NodeKind::Strong => Some(Category::Strong), + NodeKind::Emph => Some(Category::Emph), + NodeKind::Raw(_) => Some(Category::Raw), + NodeKind::Math(_) => Some(Category::Math), + NodeKind::Heading => Some(Category::Heading), + NodeKind::Minus => match parent.kind() { + NodeKind::List => Some(Category::List), + _ => Some(Category::Operator), + }, + NodeKind::EnumNumbering(_) => Some(Category::List), + NodeKind::Linebreak => Some(Category::Shortcut), + NodeKind::NonBreakingSpace => Some(Category::Shortcut), + NodeKind::EnDash => Some(Category::Shortcut), + NodeKind::EmDash => Some(Category::Shortcut), + NodeKind::Escape(_) => Some(Category::Escape), + NodeKind::Let => Some(Category::Keyword), + NodeKind::If => Some(Category::Keyword), + NodeKind::Else => Some(Category::Keyword), + NodeKind::For => Some(Category::Keyword), + NodeKind::In => Some(Category::Keyword), + NodeKind::While => Some(Category::Keyword), + NodeKind::Break => Some(Category::Keyword), + NodeKind::Continue => Some(Category::Keyword), + NodeKind::Return => Some(Category::Keyword), + NodeKind::Import => Some(Category::Keyword), + NodeKind::Include => Some(Category::Keyword), + NodeKind::From => Some(Category::Keyword), + NodeKind::Not => Some(Category::Keyword), + NodeKind::And => Some(Category::Keyword), + NodeKind::Or => Some(Category::Keyword), + NodeKind::With => Some(Category::Keyword), + NodeKind::Plus => Some(Category::Operator), + NodeKind::Star => Some(Category::Operator), + NodeKind::Slash => Some(Category::Operator), + NodeKind::PlusEq => Some(Category::Operator), + NodeKind::HyphEq => Some(Category::Operator), + NodeKind::StarEq => Some(Category::Operator), + NodeKind::SlashEq => Some(Category::Operator), + NodeKind::Eq => match parent.kind() { + NodeKind::Heading => None, + _ => Some(Category::Operator), + }, + NodeKind::EqEq => Some(Category::Operator), + NodeKind::ExclEq => Some(Category::Operator), + NodeKind::Lt => Some(Category::Operator), + NodeKind::LtEq => Some(Category::Operator), + NodeKind::Gt => Some(Category::Operator), + NodeKind::GtEq => Some(Category::Operator), + NodeKind::Dots => Some(Category::Operator), + NodeKind::Arrow => Some(Category::Operator), + NodeKind::None => Some(Category::None), + NodeKind::Auto => Some(Category::Auto), + NodeKind::Ident(_) => match parent.kind() { + NodeKind::Named => None, + NodeKind::Closure if child.span().start == parent.span().start => { + Some(Category::Function) + } + NodeKind::WithExpr => Some(Category::Function), + NodeKind::Call => Some(Category::Function), + _ => Some(Category::Variable), + }, + NodeKind::Bool(_) => Some(Category::Bool), + NodeKind::Int(_) => Some(Category::Number), + NodeKind::Float(_) => Some(Category::Number), + NodeKind::Length(_, _) => Some(Category::Number), + NodeKind::Angle(_, _) => Some(Category::Number), + NodeKind::Percentage(_) => Some(Category::Number), + NodeKind::Fraction(_) => Some(Category::Number), + NodeKind::Str(_) => Some(Category::String), + NodeKind::Error(_, _) => Some(Category::Invalid), + NodeKind::Unknown(_) => Some(Category::Invalid), + NodeKind::Markup => None, + NodeKind::Space(_) => None, + NodeKind::Parbreak => None, + NodeKind::Text(_) => None, + NodeKind::List => None, + NodeKind::Enum => None, + NodeKind::Array => None, + NodeKind::Dict => None, + NodeKind::Named => None, + NodeKind::Group => None, + NodeKind::Unary => None, + NodeKind::Binary => None, + NodeKind::Call => None, + NodeKind::CallArgs => None, + NodeKind::Closure => None, + NodeKind::ClosureParams => None, + NodeKind::Spread => None, + NodeKind::Template => None, + NodeKind::Block => None, + NodeKind::ForExpr => None, + NodeKind::WhileExpr => None, + NodeKind::IfExpr => None, + NodeKind::LetExpr => None, + NodeKind::WithExpr => None, + NodeKind::ForPattern => None, + NodeKind::ImportExpr => None, + NodeKind::ImportItems => None, + NodeKind::IncludeExpr => None, + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::source::SourceFile; + + #[test] + fn test_highlighting() { + use Category::*; + + #[track_caller] + fn test(src: &str, goal: &[(Range, Category)]) { + let mut vec = vec![]; + let source = SourceFile::detached(src); + source.highlight(0 .. src.len(), |range, category| { + vec.push((range, category)); + }); + assert_eq!(vec, goal); + } + + test("= *AB*", &[ + (0 .. 6, Heading), + (2 .. 3, Strong), + (5 .. 6, Strong), + ]); + + test("#f(x + 1)", &[ + (0 .. 2, Function), + (2 .. 3, Bracket), + (3 .. 4, Variable), + (5 .. 6, Operator), + (7 .. 8, Number), + (8 .. 9, Bracket), + ]); + + test("#let f(x) = x", &[ + (0 .. 4, Keyword), + (5 .. 6, Function), + (6 .. 7, Bracket), + (7 .. 8, Variable), + (8 .. 9, Bracket), + (10 .. 11, Operator), + (12 .. 13, Variable), + ]); + } +} diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs index 55a438535..e9011a4df 100644 --- a/src/syntax/mod.rs +++ b/src/syntax/mod.rs @@ -1,12 +1,14 @@ //! Syntax types. pub mod ast; +mod highlight; mod pretty; mod span; use std::fmt::{self, Debug, Display, Formatter}; use std::rc::Rc; +pub use highlight::*; pub use pretty::*; pub use span::*; @@ -503,7 +505,7 @@ pub enum NodeKind { EmDash, /// A slash and the letter "u" followed by a hexadecimal unicode entity /// enclosed in curly braces: `\u{1F5FA}`. - UnicodeEscape(char), + Escape(char), /// Strong text was enabled / disabled: `*`. Strong, /// Emphasized text was enabled / disabled: `_`. @@ -689,7 +691,7 @@ impl NodeKind { Self::NonBreakingSpace => "non-breaking space", Self::EnDash => "en dash", Self::EmDash => "em dash", - Self::UnicodeEscape(_) => "unicode escape sequence", + Self::Escape(_) => "escape sequence", Self::Strong => "strong", Self::Emph => "emphasis", Self::Heading => "heading",