Syntax highlighting

This commit is contained in:
Laurenz 2021-11-30 18:55:50 +01:00
parent e915cc4ef0
commit f3bdc9d3da
7 changed files with 345 additions and 59 deletions

View File

@ -10,8 +10,8 @@ pub fn page(ctx: &mut EvalContext, args: &mut Args) -> TypResult<Value> {
}
let paper = args.named::<Paper>("paper")?.or_else(|| args.find());
let width = args.named("width")?;
let height = args.named("height")?;
let width = args.named::<Smart<_>>("width")?;
let height = args.named::<Smart<_>>("height")?;
let flip = args.named("flip")?;
let margins = args.named("margins")?;
let left = args.named("left")?;
@ -30,16 +30,16 @@ pub fn page(ctx: &mut EvalContext, args: &mut Args) -> TypResult<Value> {
if let Some(width) = width {
page.class = PaperClass::Custom;
page.size.x = width;
}
if flip.unwrap_or(false) {
std::mem::swap(&mut page.size.x, &mut page.size.y);
page.size.x = width.unwrap_or(Length::inf());
}
if let Some(height) = height {
page.class = PaperClass::Custom;
page.size.y = height;
page.size.y = height.unwrap_or(Length::inf());
}
if flip.unwrap_or(false) {
std::mem::swap(&mut page.size.x, &mut page.size.y);
}
if let Some(margins) = margins {

View File

@ -94,7 +94,7 @@ fn markup_node(p: &mut Parser, at_start: &mut bool) {
| NodeKind::Linebreak
| NodeKind::Raw(_)
| NodeKind::Math(_)
| NodeKind::UnicodeEscape(_) => {
| NodeKind::Escape(_) => {
p.eat();
}

View File

@ -232,7 +232,7 @@ impl<'s> Tokens<'s> {
// Markup.
'*' | '_' | '=' | '~' | '`' | '$' => {
self.s.eat_assert(c);
NodeKind::Text(c.into())
NodeKind::Escape(c)
}
'u' if self.s.rest().starts_with("u{") => {
self.s.eat_assert('u');
@ -240,7 +240,7 @@ impl<'s> Tokens<'s> {
let sequence = self.s.eat_while(|c| c.is_ascii_alphanumeric());
if self.s.eat_if('}') {
if let Some(c) = resolve_hex(sequence) {
NodeKind::UnicodeEscape(c)
NodeKind::Escape(c)
} else {
NodeKind::Error(
ErrorPos::Full,
@ -554,10 +554,6 @@ mod tests {
use Option::None;
use TokenMode::{Code, Markup};
fn UnicodeEscape(c: char) -> NodeKind {
NodeKind::UnicodeEscape(c)
}
fn Error(pos: ErrorPos, message: &str) -> NodeKind {
NodeKind::Error(pos, message.into())
}
@ -641,7 +637,7 @@ mod tests {
('/', None, "/**/", BlockComment),
('/', Some(Markup), "*", Strong),
('/', Some(Markup), "$ $", Math(" ", false)),
('/', Some(Markup), r"\\", Text("\\")),
('/', Some(Markup), r"\\", Escape('\\')),
('/', Some(Markup), "#let", Let),
('/', Some(Code), "(", LeftParen),
('/', Some(Code), ":", Colon),
@ -741,19 +737,19 @@ mod tests {
#[test]
fn test_tokenize_escape_sequences() {
// Test escapable symbols.
t!(Markup: r"\\" => Text(r"\"));
t!(Markup: r"\/" => Text("/"));
t!(Markup: r"\[" => Text("["));
t!(Markup: r"\]" => Text("]"));
t!(Markup: r"\{" => Text("{"));
t!(Markup: r"\}" => Text("}"));
t!(Markup: r"\*" => Text("*"));
t!(Markup: r"\_" => Text("_"));
t!(Markup: r"\=" => Text("="));
t!(Markup: r"\~" => Text("~"));
t!(Markup: r"\`" => Text("`"));
t!(Markup: r"\$" => Text("$"));
t!(Markup: r"\#" => Text("#"));
t!(Markup: r"\\" => Escape('\\'));
t!(Markup: r"\/" => Escape('/'));
t!(Markup: r"\[" => Escape('['));
t!(Markup: r"\]" => Escape(']'));
t!(Markup: r"\{" => Escape('{'));
t!(Markup: r"\}" => Escape('}'));
t!(Markup: r"\*" => Escape('*'));
t!(Markup: r"\_" => Escape('_'));
t!(Markup: r"\=" => Escape('='));
t!(Markup: r"\~" => Escape('~'));
t!(Markup: r"\`" => Escape('`'));
t!(Markup: r"\$" => Escape('$'));
t!(Markup: r"\#" => Escape('#'));
// Test unescapable symbols.
t!(Markup[" /"]: r"\a" => Text(r"\"), Text("a"));
@ -763,7 +759,7 @@ mod tests {
// Test basic unicode escapes.
t!(Markup: r"\u{}" => Error(Full, "invalid unicode escape sequence"));
t!(Markup: r"\u{2603}" => UnicodeEscape('☃'));
t!(Markup: r"\u{2603}" => Escape('☃'));
t!(Markup: r"\u{P}" => Error(Full, "invalid unicode escape sequence"));
// Test unclosed unicode escapes.

View File

@ -12,7 +12,7 @@ use crate::diag::TypResult;
use crate::loading::{FileHash, Loader};
use crate::parse::{is_newline, parse, Scanner};
use crate::syntax::ast::Markup;
use crate::syntax::{GreenNode, RedNode};
use crate::syntax::{self, Category, GreenNode, RedNode};
use crate::util::PathExt;
#[cfg(feature = "codespan-reporting")]
@ -190,6 +190,11 @@ impl SourceFile {
self.line_starts.len()
}
/// Return the index of the UTF-16 code unit at the byte index.
pub fn byte_to_utf16(&self, byte_idx: usize) -> Option<usize> {
Some(self.src.get(.. byte_idx)?.chars().map(char::len_utf16).sum())
}
/// Return the index of the line that contains the given byte index.
pub fn byte_to_line(&self, byte_idx: usize) -> Option<usize> {
(byte_idx <= self.src.len()).then(|| {
@ -211,6 +216,18 @@ impl SourceFile {
Some(head.chars().count())
}
/// Return the index of the UTF-16 code unit at the byte index.
pub fn utf16_to_byte(&self, utf16_idx: usize) -> Option<usize> {
let mut k = 0;
for (i, c) in self.src.char_indices() {
if k >= utf16_idx {
return Some(i);
}
k += c.len_utf16();
}
(k == utf16_idx).then(|| self.src.len())
}
/// Return the byte position at which the given line starts.
pub fn line_to_byte(&self, line_idx: usize) -> Option<usize> {
self.line_starts.get(line_idx).copied()
@ -260,6 +277,18 @@ impl SourceFile {
// Recalculate the line starts after the edit.
self.line_starts
.extend(newlines(&self.src[start ..]).map(|idx| start + idx));
// Reparse.
self.root = parse(&self.src);
}
/// Provide highlighting categories for the given range of the source file.
pub fn highlight<F>(&self, range: Range<usize>, mut f: F)
where
F: FnMut(Range<usize>, Category),
{
let red = RedNode::from_root(self.root.clone(), self.id);
syntax::highlight(red.as_ref(), range, &mut f)
}
}
@ -373,6 +402,27 @@ mod tests {
assert_eq!(source.byte_to_column(12), Some(2));
}
#[test]
fn test_source_file_utf16() {
#[track_caller]
fn roundtrip(source: &SourceFile, byte_idx: usize, utf16_idx: usize) {
let middle = source.byte_to_utf16(byte_idx).unwrap();
let result = source.utf16_to_byte(middle).unwrap();
assert_eq!(middle, utf16_idx);
assert_eq!(result, byte_idx);
}
let source = SourceFile::detached(TEST);
roundtrip(&source, 0, 0);
roundtrip(&source, 2, 1);
roundtrip(&source, 3, 2);
roundtrip(&source, 8, 7);
roundtrip(&source, 12, 9);
roundtrip(&source, 21, 18);
assert_eq!(source.byte_to_utf16(22), None);
assert_eq!(source.utf16_to_byte(19), None);
}
#[test]
fn test_source_file_roundtrip() {
#[track_caller]

View File

@ -64,7 +64,7 @@ impl Markup {
NodeKind::Strong => Some(MarkupNode::Strong),
NodeKind::Emph => Some(MarkupNode::Emph),
NodeKind::Text(s) => Some(MarkupNode::Text(s.clone())),
NodeKind::UnicodeEscape(c) => Some(MarkupNode::Text((*c).into())),
NodeKind::Escape(c) => Some(MarkupNode::Text((*c).into())),
NodeKind::EnDash => Some(MarkupNode::Text('\u{2013}'.into())),
NodeKind::EmDash => Some(MarkupNode::Text('\u{2014}'.into())),
NodeKind::NonBreakingSpace => Some(MarkupNode::Text('\u{00A0}'.into())),
@ -581,39 +581,46 @@ impl BinOp {
/// The precedence of this operator.
pub fn precedence(self) -> usize {
match self {
Self::Mul | Self::Div => 6,
Self::Add | Self::Sub => 5,
Self::Eq | Self::Neq | Self::Lt | Self::Leq | Self::Gt | Self::Geq => 4,
Self::Mul => 6,
Self::Div => 6,
Self::Add => 5,
Self::Sub => 5,
Self::Eq => 4,
Self::Neq => 4,
Self::Lt => 4,
Self::Leq => 4,
Self::Gt => 4,
Self::Geq => 4,
Self::And => 3,
Self::Or => 2,
Self::Assign
| Self::AddAssign
| Self::SubAssign
| Self::MulAssign
| Self::DivAssign => 1,
Self::Assign => 1,
Self::AddAssign => 1,
Self::SubAssign => 1,
Self::MulAssign => 1,
Self::DivAssign => 1,
}
}
/// The associativity of this operator.
pub fn associativity(self) -> Associativity {
match self {
Self::Add
| Self::Sub
| Self::Mul
| Self::Div
| Self::And
| Self::Or
| Self::Eq
| Self::Neq
| Self::Lt
| Self::Leq
| Self::Gt
| Self::Geq => Associativity::Left,
Self::Assign
| Self::AddAssign
| Self::SubAssign
| Self::MulAssign
| Self::DivAssign => Associativity::Right,
Self::Add => Associativity::Left,
Self::Sub => Associativity::Left,
Self::Mul => Associativity::Left,
Self::Div => Associativity::Left,
Self::And => Associativity::Left,
Self::Or => Associativity::Left,
Self::Eq => Associativity::Left,
Self::Neq => Associativity::Left,
Self::Lt => Associativity::Left,
Self::Leq => Associativity::Left,
Self::Gt => Associativity::Left,
Self::Geq => Associativity::Left,
Self::Assign => Associativity::Right,
Self::AddAssign => Associativity::Right,
Self::SubAssign => Associativity::Right,
Self::MulAssign => Associativity::Right,
Self::DivAssign => Associativity::Right,
}
}

231
src/syntax/highlight.rs Normal file
View File

@ -0,0 +1,231 @@
use std::ops::Range;
use super::{NodeKind, RedRef};
/// Provide highlighting categories for the children of a node that fall into a
/// range.
pub fn highlight<F>(node: RedRef, range: Range<usize>, f: &mut F)
where
F: FnMut(Range<usize>, Category),
{
for child in node.children() {
let span = child.span();
if range.start <= span.end && range.end >= span.start {
if let Some(category) = Category::determine(child, node) {
f(span.to_range(), category);
}
highlight(child, range.clone(), f);
}
}
}
/// The syntax highlighting category of a node.
#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
pub enum Category {
/// Any kind of bracket, parenthesis or brace.
Bracket,
/// Punctuation in code.
Punctuation,
/// A line or block comment.
Comment,
/// Strong text.
Strong,
/// Emphasized text.
Emph,
/// Raw text or code.
Raw,
/// A math formula.
Math,
/// A section heading.
Heading,
/// A list or enumeration.
List,
/// An easily typable shortcut to a unicode codepoint.
Shortcut,
/// An escape sequence.
Escape,
/// A keyword.
Keyword,
/// An operator symbol.
Operator,
/// The none literal.
None,
/// The auto literal.
Auto,
/// A boolean literal.
Bool,
/// A numeric literal.
Number,
/// A string literal.
String,
/// A function.
Function,
/// A variable.
Variable,
/// An invalid node.
Invalid,
}
impl Category {
/// Determine the highlighting category of a node given its parent.
pub fn determine(child: RedRef, parent: RedRef) -> Option<Category> {
match child.kind() {
NodeKind::LeftBracket => Some(Category::Bracket),
NodeKind::RightBracket => Some(Category::Bracket),
NodeKind::LeftBrace => Some(Category::Bracket),
NodeKind::RightBrace => Some(Category::Bracket),
NodeKind::LeftParen => Some(Category::Bracket),
NodeKind::RightParen => Some(Category::Bracket),
NodeKind::Comma => Some(Category::Punctuation),
NodeKind::Semicolon => Some(Category::Punctuation),
NodeKind::Colon => Some(Category::Punctuation),
NodeKind::LineComment => Some(Category::Comment),
NodeKind::BlockComment => Some(Category::Comment),
NodeKind::Strong => Some(Category::Strong),
NodeKind::Emph => Some(Category::Emph),
NodeKind::Raw(_) => Some(Category::Raw),
NodeKind::Math(_) => Some(Category::Math),
NodeKind::Heading => Some(Category::Heading),
NodeKind::Minus => match parent.kind() {
NodeKind::List => Some(Category::List),
_ => Some(Category::Operator),
},
NodeKind::EnumNumbering(_) => Some(Category::List),
NodeKind::Linebreak => Some(Category::Shortcut),
NodeKind::NonBreakingSpace => Some(Category::Shortcut),
NodeKind::EnDash => Some(Category::Shortcut),
NodeKind::EmDash => Some(Category::Shortcut),
NodeKind::Escape(_) => Some(Category::Escape),
NodeKind::Let => Some(Category::Keyword),
NodeKind::If => Some(Category::Keyword),
NodeKind::Else => Some(Category::Keyword),
NodeKind::For => Some(Category::Keyword),
NodeKind::In => Some(Category::Keyword),
NodeKind::While => Some(Category::Keyword),
NodeKind::Break => Some(Category::Keyword),
NodeKind::Continue => Some(Category::Keyword),
NodeKind::Return => Some(Category::Keyword),
NodeKind::Import => Some(Category::Keyword),
NodeKind::Include => Some(Category::Keyword),
NodeKind::From => Some(Category::Keyword),
NodeKind::Not => Some(Category::Keyword),
NodeKind::And => Some(Category::Keyword),
NodeKind::Or => Some(Category::Keyword),
NodeKind::With => Some(Category::Keyword),
NodeKind::Plus => Some(Category::Operator),
NodeKind::Star => Some(Category::Operator),
NodeKind::Slash => Some(Category::Operator),
NodeKind::PlusEq => Some(Category::Operator),
NodeKind::HyphEq => Some(Category::Operator),
NodeKind::StarEq => Some(Category::Operator),
NodeKind::SlashEq => Some(Category::Operator),
NodeKind::Eq => match parent.kind() {
NodeKind::Heading => None,
_ => Some(Category::Operator),
},
NodeKind::EqEq => Some(Category::Operator),
NodeKind::ExclEq => Some(Category::Operator),
NodeKind::Lt => Some(Category::Operator),
NodeKind::LtEq => Some(Category::Operator),
NodeKind::Gt => Some(Category::Operator),
NodeKind::GtEq => Some(Category::Operator),
NodeKind::Dots => Some(Category::Operator),
NodeKind::Arrow => Some(Category::Operator),
NodeKind::None => Some(Category::None),
NodeKind::Auto => Some(Category::Auto),
NodeKind::Ident(_) => match parent.kind() {
NodeKind::Named => None,
NodeKind::Closure if child.span().start == parent.span().start => {
Some(Category::Function)
}
NodeKind::WithExpr => Some(Category::Function),
NodeKind::Call => Some(Category::Function),
_ => Some(Category::Variable),
},
NodeKind::Bool(_) => Some(Category::Bool),
NodeKind::Int(_) => Some(Category::Number),
NodeKind::Float(_) => Some(Category::Number),
NodeKind::Length(_, _) => Some(Category::Number),
NodeKind::Angle(_, _) => Some(Category::Number),
NodeKind::Percentage(_) => Some(Category::Number),
NodeKind::Fraction(_) => Some(Category::Number),
NodeKind::Str(_) => Some(Category::String),
NodeKind::Error(_, _) => Some(Category::Invalid),
NodeKind::Unknown(_) => Some(Category::Invalid),
NodeKind::Markup => None,
NodeKind::Space(_) => None,
NodeKind::Parbreak => None,
NodeKind::Text(_) => None,
NodeKind::List => None,
NodeKind::Enum => None,
NodeKind::Array => None,
NodeKind::Dict => None,
NodeKind::Named => None,
NodeKind::Group => None,
NodeKind::Unary => None,
NodeKind::Binary => None,
NodeKind::Call => None,
NodeKind::CallArgs => None,
NodeKind::Closure => None,
NodeKind::ClosureParams => None,
NodeKind::Spread => None,
NodeKind::Template => None,
NodeKind::Block => None,
NodeKind::ForExpr => None,
NodeKind::WhileExpr => None,
NodeKind::IfExpr => None,
NodeKind::LetExpr => None,
NodeKind::WithExpr => None,
NodeKind::ForPattern => None,
NodeKind::ImportExpr => None,
NodeKind::ImportItems => None,
NodeKind::IncludeExpr => None,
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::source::SourceFile;
#[test]
fn test_highlighting() {
use Category::*;
#[track_caller]
fn test(src: &str, goal: &[(Range<usize>, Category)]) {
let mut vec = vec![];
let source = SourceFile::detached(src);
source.highlight(0 .. src.len(), |range, category| {
vec.push((range, category));
});
assert_eq!(vec, goal);
}
test("= *AB*", &[
(0 .. 6, Heading),
(2 .. 3, Strong),
(5 .. 6, Strong),
]);
test("#f(x + 1)", &[
(0 .. 2, Function),
(2 .. 3, Bracket),
(3 .. 4, Variable),
(5 .. 6, Operator),
(7 .. 8, Number),
(8 .. 9, Bracket),
]);
test("#let f(x) = x", &[
(0 .. 4, Keyword),
(5 .. 6, Function),
(6 .. 7, Bracket),
(7 .. 8, Variable),
(8 .. 9, Bracket),
(10 .. 11, Operator),
(12 .. 13, Variable),
]);
}
}

View File

@ -1,12 +1,14 @@
//! Syntax types.
pub mod ast;
mod highlight;
mod pretty;
mod span;
use std::fmt::{self, Debug, Display, Formatter};
use std::rc::Rc;
pub use highlight::*;
pub use pretty::*;
pub use span::*;
@ -503,7 +505,7 @@ pub enum NodeKind {
EmDash,
/// A slash and the letter "u" followed by a hexadecimal unicode entity
/// enclosed in curly braces: `\u{1F5FA}`.
UnicodeEscape(char),
Escape(char),
/// Strong text was enabled / disabled: `*`.
Strong,
/// Emphasized text was enabled / disabled: `_`.
@ -689,7 +691,7 @@ impl NodeKind {
Self::NonBreakingSpace => "non-breaking space",
Self::EnDash => "en dash",
Self::EmDash => "em dash",
Self::UnicodeEscape(_) => "unicode escape sequence",
Self::Escape(_) => "escape sequence",
Self::Strong => "strong",
Self::Emph => "emphasis",
Self::Heading => "heading",