Syntax highlighting

This commit is contained in:
Laurenz 2021-11-30 18:55:50 +01:00
parent e915cc4ef0
commit f3bdc9d3da
7 changed files with 345 additions and 59 deletions

View File

@ -10,8 +10,8 @@ pub fn page(ctx: &mut EvalContext, args: &mut Args) -> TypResult<Value> {
} }
let paper = args.named::<Paper>("paper")?.or_else(|| args.find()); let paper = args.named::<Paper>("paper")?.or_else(|| args.find());
let width = args.named("width")?; let width = args.named::<Smart<_>>("width")?;
let height = args.named("height")?; let height = args.named::<Smart<_>>("height")?;
let flip = args.named("flip")?; let flip = args.named("flip")?;
let margins = args.named("margins")?; let margins = args.named("margins")?;
let left = args.named("left")?; let left = args.named("left")?;
@ -30,16 +30,16 @@ pub fn page(ctx: &mut EvalContext, args: &mut Args) -> TypResult<Value> {
if let Some(width) = width { if let Some(width) = width {
page.class = PaperClass::Custom; page.class = PaperClass::Custom;
page.size.x = width; page.size.x = width.unwrap_or(Length::inf());
}
if flip.unwrap_or(false) {
std::mem::swap(&mut page.size.x, &mut page.size.y);
} }
if let Some(height) = height { if let Some(height) = height {
page.class = PaperClass::Custom; page.class = PaperClass::Custom;
page.size.y = height; page.size.y = height.unwrap_or(Length::inf());
}
if flip.unwrap_or(false) {
std::mem::swap(&mut page.size.x, &mut page.size.y);
} }
if let Some(margins) = margins { if let Some(margins) = margins {

View File

@ -94,7 +94,7 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) {
| NodeKind::Linebreak | NodeKind::Linebreak
| NodeKind::Raw(_) | NodeKind::Raw(_)
| NodeKind::Math(_) | NodeKind::Math(_)
| NodeKind::UnicodeEscape(_) => { | NodeKind::Escape(_) => {
p.eat(); p.eat();
} }

View File

@ -232,7 +232,7 @@ impl<'s> Tokens<'s> {
// Markup. // Markup.
'*' | '_' | '=' | '~' | '`' | '$' => { '*' | '_' | '=' | '~' | '`' | '$' => {
self.s.eat_assert(c); self.s.eat_assert(c);
NodeKind::Text(c.into()) NodeKind::Escape(c)
} }
'u' if self.s.rest().starts_with("u{") => { 'u' if self.s.rest().starts_with("u{") => {
self.s.eat_assert('u'); self.s.eat_assert('u');
@ -240,7 +240,7 @@ impl<'s> Tokens<'s> {
let sequence = self.s.eat_while(|c| c.is_ascii_alphanumeric()); let sequence = self.s.eat_while(|c| c.is_ascii_alphanumeric());
if self.s.eat_if('}') { if self.s.eat_if('}') {
if let Some(c) = resolve_hex(sequence) { if let Some(c) = resolve_hex(sequence) {
NodeKind::UnicodeEscape(c) NodeKind::Escape(c)
} else { } else {
NodeKind::Error( NodeKind::Error(
ErrorPos::Full, ErrorPos::Full,
@ -554,10 +554,6 @@ mod tests {
use Option::None; use Option::None;
use TokenMode::{Code, Markup}; use TokenMode::{Code, Markup};
fn UnicodeEscape(c: char) -> NodeKind {
NodeKind::UnicodeEscape(c)
}
fn Error(pos: ErrorPos, message: &str) -> NodeKind { fn Error(pos: ErrorPos, message: &str) -> NodeKind {
NodeKind::Error(pos, message.into()) NodeKind::Error(pos, message.into())
} }
@ -641,7 +637,7 @@ mod tests {
('/', None, "/**/", BlockComment), ('/', None, "/**/", BlockComment),
('/', Some(Markup), "*", Strong), ('/', Some(Markup), "*", Strong),
('/', Some(Markup), "$ $", Math(" ", false)), ('/', Some(Markup), "$ $", Math(" ", false)),
('/', Some(Markup), r"\\", Text("\\")), ('/', Some(Markup), r"\\", Escape('\\')),
('/', Some(Markup), "#let", Let), ('/', Some(Markup), "#let", Let),
('/', Some(Code), "(", LeftParen), ('/', Some(Code), "(", LeftParen),
('/', Some(Code), ":", Colon), ('/', Some(Code), ":", Colon),
@ -741,19 +737,19 @@ mod tests {
#[test] #[test]
fn test_tokenize_escape_sequences() { fn test_tokenize_escape_sequences() {
// Test escapable symbols. // Test escapable symbols.
t!(Markup: r"\\" => Text(r"\")); t!(Markup: r"\\" => Escape('\\'));
t!(Markup: r"\/" => Text("/")); t!(Markup: r"\/" => Escape('/'));
t!(Markup: r"\[" => Text("[")); t!(Markup: r"\[" => Escape('['));
t!(Markup: r"\]" => Text("]")); t!(Markup: r"\]" => Escape(']'));
t!(Markup: r"\{" => Text("{")); t!(Markup: r"\{" => Escape('{'));
t!(Markup: r"\}" => Text("}")); t!(Markup: r"\}" => Escape('}'));
t!(Markup: r"\*" => Text("*")); t!(Markup: r"\*" => Escape('*'));
t!(Markup: r"\_" => Text("_")); t!(Markup: r"\_" => Escape('_'));
t!(Markup: r"\=" => Text("=")); t!(Markup: r"\=" => Escape('='));
t!(Markup: r"\~" => Text("~")); t!(Markup: r"\~" => Escape('~'));
t!(Markup: r"\`" => Text("`")); t!(Markup: r"\`" => Escape('`'));
t!(Markup: r"\$" => Text("$")); t!(Markup: r"\$" => Escape('$'));
t!(Markup: r"\#" => Text("#")); t!(Markup: r"\#" => Escape('#'));
// Test unescapable symbols. // Test unescapable symbols.
t!(Markup[" /"]: r"\a" => Text(r"\"), Text("a")); t!(Markup[" /"]: r"\a" => Text(r"\"), Text("a"));
@ -763,7 +759,7 @@ mod tests {
// Test basic unicode escapes. // Test basic unicode escapes.
t!(Markup: r"\u{}" => Error(Full, "invalid unicode escape sequence")); t!(Markup: r"\u{}" => Error(Full, "invalid unicode escape sequence"));
t!(Markup: r"\u{2603}" => UnicodeEscape('☃')); t!(Markup: r"\u{2603}" => Escape('☃'));
t!(Markup: r"\u{P}" => Error(Full, "invalid unicode escape sequence")); t!(Markup: r"\u{P}" => Error(Full, "invalid unicode escape sequence"));
// Test unclosed unicode escapes. // Test unclosed unicode escapes.

View File

@ -12,7 +12,7 @@ use crate::diag::TypResult;
use crate::loading::{FileHash, Loader}; use crate::loading::{FileHash, Loader};
use crate::parse::{is_newline, parse, Scanner}; use crate::parse::{is_newline, parse, Scanner};
use crate::syntax::ast::Markup; use crate::syntax::ast::Markup;
use crate::syntax::{GreenNode, RedNode}; use crate::syntax::{self, Category, GreenNode, RedNode};
use crate::util::PathExt; use crate::util::PathExt;
#[cfg(feature = "codespan-reporting")] #[cfg(feature = "codespan-reporting")]
@ -190,6 +190,11 @@ impl SourceFile {
self.line_starts.len() self.line_starts.len()
} }
/// Return the index of the UTF-16 code unit at the byte index.
pub fn byte_to_utf16(&self, byte_idx: usize) -> Option<usize> {
Some(self.src.get(.. byte_idx)?.chars().map(char::len_utf16).sum())
}
/// Return the index of the line that contains the given byte index. /// Return the index of the line that contains the given byte index.
pub fn byte_to_line(&self, byte_idx: usize) -> Option<usize> { pub fn byte_to_line(&self, byte_idx: usize) -> Option<usize> {
(byte_idx <= self.src.len()).then(|| { (byte_idx <= self.src.len()).then(|| {
@ -211,6 +216,18 @@ impl SourceFile {
Some(head.chars().count()) Some(head.chars().count())
} }
/// Return the index of the UTF-16 code unit at the byte index.
pub fn utf16_to_byte(&self, utf16_idx: usize) -> Option<usize> {
let mut k = 0;
for (i, c) in self.src.char_indices() {
if k >= utf16_idx {
return Some(i);
}
k += c.len_utf16();
}
(k == utf16_idx).then(|| self.src.len())
}
/// Return the byte position at which the given line starts. /// Return the byte position at which the given line starts.
pub fn line_to_byte(&self, line_idx: usize) -> Option<usize> { pub fn line_to_byte(&self, line_idx: usize) -> Option<usize> {
self.line_starts.get(line_idx).copied() self.line_starts.get(line_idx).copied()
@ -260,6 +277,18 @@ impl SourceFile {
// Recalculate the line starts after the edit. // Recalculate the line starts after the edit.
self.line_starts self.line_starts
.extend(newlines(&self.src[start ..]).map(|idx| start + idx)); .extend(newlines(&self.src[start ..]).map(|idx| start + idx));
// Reparse.
self.root = parse(&self.src);
}
/// Provide highlighting categories for the given range of the source file.
pub fn highlight<F>(&self, range: Range<usize>, mut f: F)
where
F: FnMut(Range<usize>, Category),
{
let red = RedNode::from_root(self.root.clone(), self.id);
syntax::highlight(red.as_ref(), range, &mut f)
} }
} }
@ -373,6 +402,27 @@ mod tests {
assert_eq!(source.byte_to_column(12), Some(2)); assert_eq!(source.byte_to_column(12), Some(2));
} }
#[test]
fn test_source_file_utf16() {
#[track_caller]
fn roundtrip(source: &SourceFile, byte_idx: usize, utf16_idx: usize) {
let middle = source.byte_to_utf16(byte_idx).unwrap();
let result = source.utf16_to_byte(middle).unwrap();
assert_eq!(middle, utf16_idx);
assert_eq!(result, byte_idx);
}
let source = SourceFile::detached(TEST);
roundtrip(&source, 0, 0);
roundtrip(&source, 2, 1);
roundtrip(&source, 3, 2);
roundtrip(&source, 8, 7);
roundtrip(&source, 12, 9);
roundtrip(&source, 21, 18);
assert_eq!(source.byte_to_utf16(22), None);
assert_eq!(source.utf16_to_byte(19), None);
}
#[test] #[test]
fn test_source_file_roundtrip() { fn test_source_file_roundtrip() {
#[track_caller] #[track_caller]

View File

@ -64,7 +64,7 @@ impl Markup {
NodeKind::Strong => Some(MarkupNode::Strong), NodeKind::Strong => Some(MarkupNode::Strong),
NodeKind::Emph => Some(MarkupNode::Emph), NodeKind::Emph => Some(MarkupNode::Emph),
NodeKind::Text(s) => Some(MarkupNode::Text(s.clone())), NodeKind::Text(s) => Some(MarkupNode::Text(s.clone())),
NodeKind::UnicodeEscape(c) => Some(MarkupNode::Text((*c).into())), NodeKind::Escape(c) => Some(MarkupNode::Text((*c).into())),
NodeKind::EnDash => Some(MarkupNode::Text('\u{2013}'.into())), NodeKind::EnDash => Some(MarkupNode::Text('\u{2013}'.into())),
NodeKind::EmDash => Some(MarkupNode::Text('\u{2014}'.into())), NodeKind::EmDash => Some(MarkupNode::Text('\u{2014}'.into())),
NodeKind::NonBreakingSpace => Some(MarkupNode::Text('\u{00A0}'.into())), NodeKind::NonBreakingSpace => Some(MarkupNode::Text('\u{00A0}'.into())),
@ -581,39 +581,46 @@ impl BinOp {
/// The precedence of this operator. /// The precedence of this operator.
pub fn precedence(self) -> usize { pub fn precedence(self) -> usize {
match self { match self {
Self::Mul | Self::Div => 6, Self::Mul => 6,
Self::Add | Self::Sub => 5, Self::Div => 6,
Self::Eq | Self::Neq | Self::Lt | Self::Leq | Self::Gt | Self::Geq => 4, Self::Add => 5,
Self::Sub => 5,
Self::Eq => 4,
Self::Neq => 4,
Self::Lt => 4,
Self::Leq => 4,
Self::Gt => 4,
Self::Geq => 4,
Self::And => 3, Self::And => 3,
Self::Or => 2, Self::Or => 2,
Self::Assign Self::Assign => 1,
| Self::AddAssign Self::AddAssign => 1,
| Self::SubAssign Self::SubAssign => 1,
| Self::MulAssign Self::MulAssign => 1,
| Self::DivAssign => 1, Self::DivAssign => 1,
} }
} }
/// The associativity of this operator. /// The associativity of this operator.
pub fn associativity(self) -> Associativity { pub fn associativity(self) -> Associativity {
match self { match self {
Self::Add Self::Add => Associativity::Left,
| Self::Sub Self::Sub => Associativity::Left,
| Self::Mul Self::Mul => Associativity::Left,
| Self::Div Self::Div => Associativity::Left,
| Self::And Self::And => Associativity::Left,
| Self::Or Self::Or => Associativity::Left,
| Self::Eq Self::Eq => Associativity::Left,
| Self::Neq Self::Neq => Associativity::Left,
| Self::Lt Self::Lt => Associativity::Left,
| Self::Leq Self::Leq => Associativity::Left,
| Self::Gt Self::Gt => Associativity::Left,
| Self::Geq => Associativity::Left, Self::Geq => Associativity::Left,
Self::Assign Self::Assign => Associativity::Right,
| Self::AddAssign Self::AddAssign => Associativity::Right,
| Self::SubAssign Self::SubAssign => Associativity::Right,
| Self::MulAssign Self::MulAssign => Associativity::Right,
| Self::DivAssign => Associativity::Right, Self::DivAssign => Associativity::Right,
} }
} }

231
src/syntax/highlight.rs Normal file
View File

@ -0,0 +1,231 @@
use std::ops::Range;
use super::{NodeKind, RedRef};
/// Provide highlighting categories for the children of a node that fall into a
/// range.
pub fn highlight<F>(node: RedRef, range: Range<usize>, f: &mut F)
where
F: FnMut(Range<usize>, Category),
{
for child in node.children() {
let span = child.span();
if range.start <= span.end && range.end >= span.start {
if let Some(category) = Category::determine(child, node) {
f(span.to_range(), category);
}
highlight(child, range.clone(), f);
}
}
}
/// The syntax highlighting category of a node.
#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
pub enum Category {
/// Any kind of bracket, parenthesis or brace.
Bracket,
/// Punctuation in code.
Punctuation,
/// A line or block comment.
Comment,
/// Strong text.
Strong,
/// Emphasized text.
Emph,
/// Raw text or code.
Raw,
/// A math formula.
Math,
/// A section heading.
Heading,
/// A list or enumeration.
List,
/// An easily typable shortcut to a unicode codepoint.
Shortcut,
/// An escape sequence.
Escape,
/// A keyword.
Keyword,
/// An operator symbol.
Operator,
/// The none literal.
None,
/// The auto literal.
Auto,
/// A boolean literal.
Bool,
/// A numeric literal.
Number,
/// A string literal.
String,
/// A function.
Function,
/// A variable.
Variable,
/// An invalid node.
Invalid,
}
impl Category {
/// Determine the highlighting category of a node given its parent.
pub fn determine(child: RedRef, parent: RedRef) -> Option<Category> {
match child.kind() {
NodeKind::LeftBracket => Some(Category::Bracket),
NodeKind::RightBracket => Some(Category::Bracket),
NodeKind::LeftBrace => Some(Category::Bracket),
NodeKind::RightBrace => Some(Category::Bracket),
NodeKind::LeftParen => Some(Category::Bracket),
NodeKind::RightParen => Some(Category::Bracket),
NodeKind::Comma => Some(Category::Punctuation),
NodeKind::Semicolon => Some(Category::Punctuation),
NodeKind::Colon => Some(Category::Punctuation),
NodeKind::LineComment => Some(Category::Comment),
NodeKind::BlockComment => Some(Category::Comment),
NodeKind::Strong => Some(Category::Strong),
NodeKind::Emph => Some(Category::Emph),
NodeKind::Raw(_) => Some(Category::Raw),
NodeKind::Math(_) => Some(Category::Math),
NodeKind::Heading => Some(Category::Heading),
NodeKind::Minus => match parent.kind() {
NodeKind::List => Some(Category::List),
_ => Some(Category::Operator),
},
NodeKind::EnumNumbering(_) => Some(Category::List),
NodeKind::Linebreak => Some(Category::Shortcut),
NodeKind::NonBreakingSpace => Some(Category::Shortcut),
NodeKind::EnDash => Some(Category::Shortcut),
NodeKind::EmDash => Some(Category::Shortcut),
NodeKind::Escape(_) => Some(Category::Escape),
NodeKind::Let => Some(Category::Keyword),
NodeKind::If => Some(Category::Keyword),
NodeKind::Else => Some(Category::Keyword),
NodeKind::For => Some(Category::Keyword),
NodeKind::In => Some(Category::Keyword),
NodeKind::While => Some(Category::Keyword),
NodeKind::Break => Some(Category::Keyword),
NodeKind::Continue => Some(Category::Keyword),
NodeKind::Return => Some(Category::Keyword),
NodeKind::Import => Some(Category::Keyword),
NodeKind::Include => Some(Category::Keyword),
NodeKind::From => Some(Category::Keyword),
NodeKind::Not => Some(Category::Keyword),
NodeKind::And => Some(Category::Keyword),
NodeKind::Or => Some(Category::Keyword),
NodeKind::With => Some(Category::Keyword),
NodeKind::Plus => Some(Category::Operator),
NodeKind::Star => Some(Category::Operator),
NodeKind::Slash => Some(Category::Operator),
NodeKind::PlusEq => Some(Category::Operator),
NodeKind::HyphEq => Some(Category::Operator),
NodeKind::StarEq => Some(Category::Operator),
NodeKind::SlashEq => Some(Category::Operator),
NodeKind::Eq => match parent.kind() {
NodeKind::Heading => None,
_ => Some(Category::Operator),
},
NodeKind::EqEq => Some(Category::Operator),
NodeKind::ExclEq => Some(Category::Operator),
NodeKind::Lt => Some(Category::Operator),
NodeKind::LtEq => Some(Category::Operator),
NodeKind::Gt => Some(Category::Operator),
NodeKind::GtEq => Some(Category::Operator),
NodeKind::Dots => Some(Category::Operator),
NodeKind::Arrow => Some(Category::Operator),
NodeKind::None => Some(Category::None),
NodeKind::Auto => Some(Category::Auto),
NodeKind::Ident(_) => match parent.kind() {
NodeKind::Named => None,
NodeKind::Closure if child.span().start == parent.span().start => {
Some(Category::Function)
}
NodeKind::WithExpr => Some(Category::Function),
NodeKind::Call => Some(Category::Function),
_ => Some(Category::Variable),
},
NodeKind::Bool(_) => Some(Category::Bool),
NodeKind::Int(_) => Some(Category::Number),
NodeKind::Float(_) => Some(Category::Number),
NodeKind::Length(_, _) => Some(Category::Number),
NodeKind::Angle(_, _) => Some(Category::Number),
NodeKind::Percentage(_) => Some(Category::Number),
NodeKind::Fraction(_) => Some(Category::Number),
NodeKind::Str(_) => Some(Category::String),
NodeKind::Error(_, _) => Some(Category::Invalid),
NodeKind::Unknown(_) => Some(Category::Invalid),
NodeKind::Markup => None,
NodeKind::Space(_) => None,
NodeKind::Parbreak => None,
NodeKind::Text(_) => None,
NodeKind::List => None,
NodeKind::Enum => None,
NodeKind::Array => None,
NodeKind::Dict => None,
NodeKind::Named => None,
NodeKind::Group => None,
NodeKind::Unary => None,
NodeKind::Binary => None,
NodeKind::Call => None,
NodeKind::CallArgs => None,
NodeKind::Closure => None,
NodeKind::ClosureParams => None,
NodeKind::Spread => None,
NodeKind::Template => None,
NodeKind::Block => None,
NodeKind::ForExpr => None,
NodeKind::WhileExpr => None,
NodeKind::IfExpr => None,
NodeKind::LetExpr => None,
NodeKind::WithExpr => None,
NodeKind::ForPattern => None,
NodeKind::ImportExpr => None,
NodeKind::ImportItems => None,
NodeKind::IncludeExpr => None,
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::source::SourceFile;
#[test]
fn test_highlighting() {
use Category::*;
#[track_caller]
fn test(src: &str, goal: &[(Range<usize>, Category)]) {
let mut vec = vec![];
let source = SourceFile::detached(src);
source.highlight(0 .. src.len(), |range, category| {
vec.push((range, category));
});
assert_eq!(vec, goal);
}
test("= *AB*", &[
(0 .. 6, Heading),
(2 .. 3, Strong),
(5 .. 6, Strong),
]);
test("#f(x + 1)", &[
(0 .. 2, Function),
(2 .. 3, Bracket),
(3 .. 4, Variable),
(5 .. 6, Operator),
(7 .. 8, Number),
(8 .. 9, Bracket),
]);
test("#let f(x) = x", &[
(0 .. 4, Keyword),
(5 .. 6, Function),
(6 .. 7, Bracket),
(7 .. 8, Variable),
(8 .. 9, Bracket),
(10 .. 11, Operator),
(12 .. 13, Variable),
]);
}
}

View File

@ -1,12 +1,14 @@
//! Syntax types. //! Syntax types.
pub mod ast; pub mod ast;
mod highlight;
mod pretty; mod pretty;
mod span; mod span;
use std::fmt::{self, Debug, Display, Formatter}; use std::fmt::{self, Debug, Display, Formatter};
use std::rc::Rc; use std::rc::Rc;
pub use highlight::*;
pub use pretty::*; pub use pretty::*;
pub use span::*; pub use span::*;
@ -503,7 +505,7 @@ pub enum NodeKind {
EmDash, EmDash,
/// A slash and the letter "u" followed by a hexadecimal unicode entity /// A slash and the letter "u" followed by a hexadecimal unicode entity
/// enclosed in curly braces: `\u{1F5FA}`. /// enclosed in curly braces: `\u{1F5FA}`.
UnicodeEscape(char), Escape(char),
/// Strong text was enabled / disabled: `*`. /// Strong text was enabled / disabled: `*`.
Strong, Strong,
/// Emphasized text was enabled / disabled: `_`. /// Emphasized text was enabled / disabled: `_`.
@ -689,7 +691,7 @@ impl NodeKind {
Self::NonBreakingSpace => "non-breaking space", Self::NonBreakingSpace => "non-breaking space",
Self::EnDash => "en dash", Self::EnDash => "en dash",
Self::EmDash => "em dash", Self::EmDash => "em dash",
Self::UnicodeEscape(_) => "unicode escape sequence", Self::Escape(_) => "escape sequence",
Self::Strong => "strong", Self::Strong => "strong",
Self::Emph => "emphasis", Self::Emph => "emphasis",
Self::Heading => "heading", Self::Heading => "heading",