Reorganize syntax types into two modules 📦

This commit is contained in:
Laurenz 2020-09-30 12:38:02 +02:00
parent fee5170a68
commit bc1b4216a8
17 changed files with 243 additions and 245 deletions

View File

@ -4,7 +4,7 @@ use std::collections::BTreeMap;
use std::fmt::{self, Debug, Display, Formatter};
use std::ops::Index;
use crate::syntax::span::{Span, Spanned};
use crate::syntax::{Span, Spanned};
/// A table data structure, which maps from integers (`u64`) or strings to a
/// generic value type.

View File

@ -11,8 +11,7 @@ use crate::color::RgbaColor;
use crate::layout::{Command, Commands, Dir, LayoutContext, SpecAlign};
use crate::length::{Length, ScaleLength};
use crate::paper::Paper;
use crate::syntax::span::{Span, Spanned};
use crate::syntax::tree::{Ident, SyntaxNode, SyntaxTree};
use crate::syntax::{Ident, Span, Spanned, SyntaxNode, SyntaxTree};
use crate::{DynFuture, Feedback, Pass};
/// A computational value.

View File

@ -7,7 +7,7 @@
#[cfg(feature = "serialize")]
use serde::Serialize;
use crate::syntax::span::SpanVec;
use crate::syntax::SpanVec;
/// A list of spanned diagnostics.
pub type Diagnostics = SpanVec<Diagnostic>;
@ -42,7 +42,7 @@ impl Diagnostic {
///
/// ```
/// # use typstc::error;
/// # use typstc::syntax::span::Span;
/// # use typstc::syntax::Span;
/// # use typstc::Feedback;
/// # let span = Span::ZERO;
/// # let mut feedback = Feedback::new();
@ -87,7 +87,7 @@ macro_rules! __impl_diagnostic {
};
($level:expr; $span:expr, $fmt:literal $($tts:tt)*) => {
$crate::syntax::span::Spanned::new(
$crate::syntax::Spanned::new(
$crate::__impl_diagnostic!($level; $fmt $($tts)*),
$span,
)

View File

@ -25,7 +25,7 @@ use crate::compute::scope::Scope;
use crate::font::SharedFontLoader;
use crate::geom::{Margins, Size};
use crate::style::{LayoutStyle, PageStyle, TextStyle};
use crate::syntax::tree::SyntaxTree;
use crate::syntax::SyntaxTree;
use elements::LayoutElements;
use prelude::*;

View File

@ -4,9 +4,9 @@ use super::line::{LineContext, LineLayouter};
use super::text::{layout_text, TextContext};
use super::*;
use crate::style::LayoutStyle;
use crate::syntax::decoration::Decoration;
use crate::syntax::span::{Span, Spanned};
use crate::syntax::tree::{CallExpr, Code, Heading, SyntaxNode, SyntaxTree};
use crate::syntax::{
CallExpr, Code, Decoration, Heading, Span, Spanned, SyntaxNode, SyntaxTree,
};
use crate::{DynFuture, Feedback, Pass};
/// Layout a syntax tree into a collection of boxes.

View File

@ -2,9 +2,9 @@
//!
//! # Steps
//! - **Parsing:** The parsing step first transforms a plain string into an
//! [iterator of tokens][tokens]. Then, a parser constructs a syntax tree from
//! the token stream. The structures describing the tree can be found in the
//! [syntax] module.
//! [iterator of tokens][tokens]. Then, a [parser] constructs a syntax tree
//! from the token stream. The structures describing the tree can be found in
//! the [syntax] module.
//! - **Layouting:** The next step is to transform the syntax tree into a
//! portable representation of the typesetted document. Types for these can be
//! found in the [layout] module. A finished layout ready for exporting is a
@ -13,7 +13,8 @@
//! format. Submodules for these formats are located in the [export] module.
//! Currently, the only supported output format is [_PDF_].
//!
//! [tokens]: syntax/tokens/struct.Tokens.html
//! [tokens]: parse/struct.Tokens.html
//! [parser]: parse/fn.parse.html
//! [syntax]: syntax/index.html
//! [layout]: layout/index.html
//! [export]: export/index.html
@ -34,6 +35,7 @@ pub mod layout;
pub mod length;
pub mod library;
pub mod paper;
pub mod parse;
pub mod prelude;
pub mod style;
pub mod syntax;
@ -48,10 +50,7 @@ use crate::diagnostic::Diagnostics;
use crate::font::SharedFontLoader;
use crate::layout::{Commands, MultiLayout};
use crate::style::{LayoutStyle, PageStyle, TextStyle};
use crate::syntax::decoration::Decorations;
use crate::syntax::parsing::parse;
use crate::syntax::span::{Offset, Pos};
use crate::syntax::tree::SyntaxTree;
use crate::syntax::{Decorations, Offset, Pos, SyntaxTree};
/// Transforms source code into typesetted layouts.
///
@ -87,7 +86,7 @@ impl Typesetter {
/// Parse source code into a syntax tree.
pub fn parse(&self, src: &str) -> Pass<SyntaxTree> {
parse(src)
parse::parse(src)
}
/// Layout a syntax tree and return the produced layout.

View File

@ -1,4 +1,4 @@
use crate::syntax::tokens::is_newline_char;
use super::is_newline_char;
/// Resolves all escape sequences in a string.
pub fn unescape_string(string: &str) -> String {

11
src/parse/mod.rs Normal file
View File

@ -0,0 +1,11 @@
//! Parsing and tokenization.
mod escaping;
mod parser;
mod tokenizer;
pub use parser::*;
pub use tokenizer::*;
#[cfg(test)]
mod tests;

View File

@ -1,12 +1,10 @@
use std::str::FromStr;
use super::escaping::*;
use super::*;
use crate::color::RgbaColor;
use crate::compute::table::SpannedEntry;
use crate::syntax::decoration::Decoration;
use crate::syntax::span::{Pos, Span, Spanned};
use crate::syntax::tokens::{Token, TokenMode, Tokens};
use crate::syntax::tree::*;
use crate::syntax::*;
use crate::{Feedback, Pass};
/// Parse a string of source code.

View File

@ -1,16 +1,16 @@
#![allow(non_snake_case)]
use std::fmt::Debug;
use super::parse;
use crate::color::RgbaColor;
use crate::compute::table::SpannedEntry;
use crate::length::Length;
use crate::syntax::decoration::Decoration::*;
use crate::syntax::span::Spanned;
use crate::syntax::tests::*;
use crate::syntax::tree::*;
use crate::syntax::*;
// ------------------------------ Construct Syntax Nodes ------------------------------ //
use Decoration::*;
use SyntaxNode::{
Linebreak as L, Parbreak as P, Spacing as S, ToggleBolder as B, ToggleItalic as I,
};
@ -161,6 +161,42 @@ macro_rules! d {
};
}
/// Assert that expected and found are equal, printing both and panicking
/// and the source of their test case if they aren't.
///
/// When `cmp_spans` is false, spans are ignored.
pub fn check<T>(src: &str, exp: T, found: T, cmp_spans: bool)
where
T: Debug + PartialEq,
{
Span::set_cmp(cmp_spans);
let equal = exp == found;
Span::set_cmp(true);
if !equal {
println!("source: {:?}", src);
if cmp_spans {
println!("expected: {:#?}", exp);
println!("found: {:#?}", found);
} else {
println!("expected: {:?}", exp);
println!("found: {:?}", found);
}
panic!("test failed");
}
}
pub fn s<T>(sl: usize, sc: usize, el: usize, ec: usize, v: T) -> Spanned<T> {
Spanned::new(v, Span::new(Pos::new(sl, sc), Pos::new(el, ec)))
}
// Enables tests to optionally specify spans.
impl<T> From<T> for Spanned<T> {
fn from(t: T) -> Self {
Spanned::zero(t)
}
}
// --------------------------------------- Tests -------------------------------------- //
#[test]

View File

@ -4,158 +4,11 @@ use std::iter::Peekable;
use std::str::Chars;
use unicode_xid::UnicodeXID;
use super::span::{Pos, Span, Spanned};
use crate::length::Length;
use crate::syntax::{Pos, Span, Spanned, Token};
use Token::*;
use TokenMode::*;
/// A minimal semantic entity of source code.
#[derive(Debug, Copy, Clone, PartialEq)]
pub enum Token<'s> {
/// One or more whitespace characters. The contained `usize` denotes the
/// number of newlines that were contained in the whitespace.
Space(usize),
/// A line comment with inner string contents `//<str>\n`.
LineComment(&'s str),
/// A block comment with inner string contents `/*<str>*/`. The comment
/// can contain nested block comments.
BlockComment(&'s str),
/// A left bracket starting a function invocation or body: `[`.
LeftBracket,
/// A right bracket ending a function invocation or body: `]`.
RightBracket,
/// A left parenthesis in a function header: `(`.
LeftParen,
/// A right parenthesis in a function header: `)`.
RightParen,
/// A left brace in a function header: `{`.
LeftBrace,
/// A right brace in a function header: `}`.
RightBrace,
/// A double forward chevron in a function header: `>>`.
Chain,
/// A colon in a function header: `:`.
Colon,
/// A comma in a function header: `,`.
Comma,
/// An equals sign in a function header: `=`.
Equals,
/// An identifier in a function header: `center`.
Ident(&'s str),
/// A quoted string in a function header: `"..."`.
Str {
/// The string inside the quotes.
///
/// _Note_: If the string contains escape sequences these are not yet
/// applied to be able to just store a string slice here instead of
/// a String. The escaping is done later in the parser.
string: &'s str,
/// Whether the closing quote was present.
terminated: bool,
},
/// A boolean in a function header: `true | false`.
Bool(bool),
/// A number in a function header: `3.14`.
Number(f64),
/// A length in a function header: `12pt`.
Length(Length),
/// A hex value in a function header: `#20d82a`.
Hex(&'s str),
/// A plus in a function header, signifying the addition of expressions.
Plus,
/// A hyphen in a function header, signifying the subtraction of
/// expressions.
Hyphen,
/// A slash in a function header, signifying the division of expressions.
Slash,
/// A star. It can appear in a function header where it signifies the
/// multiplication of expressions or the body where it modifies the styling.
Star,
/// An underscore in body-text.
Underscore,
/// A backslash followed by whitespace in text.
Backslash,
/// A hashtag token in the body can indicate compute mode or headings.
Hashtag,
/// A unicode escape sequence.
UnicodeEscape {
/// The escape sequence between two braces.
sequence: &'s str,
/// Whether the closing brace was present.
terminated: bool,
},
/// Raw text.
Raw {
/// The raw text (not yet unescaped as for strings).
raw: &'s str,
/// Whether the closing backtick was present.
terminated: bool,
},
/// Multi-line code block.
Code {
/// The language of the code block, if specified.
lang: Option<Spanned<&'s str>>,
/// The raw text (not yet unescaped as for strings).
raw: &'s str,
/// Whether the closing backticks were present.
terminated: bool,
},
/// Any other consecutive string.
Text(&'s str),
/// Things that are not valid in the context they appeared in.
Invalid(&'s str),
}
impl<'s> Token<'s> {
/// The natural-language name for this token for use in error messages.
pub fn name(self) -> &'static str {
match self {
Space(_) => "space",
LineComment(_) => "line comment",
BlockComment(_) => "block comment",
LeftBracket => "opening bracket",
RightBracket => "closing bracket",
LeftParen => "opening paren",
RightParen => "closing paren",
LeftBrace => "opening brace",
RightBrace => "closing brace",
Chain => "function chain operator",
Colon => "colon",
Comma => "comma",
Equals => "equals sign",
Ident(_) => "identifier",
Str { .. } => "string",
Bool(_) => "bool",
Number(_) => "number",
Length(_) => "length",
Hex(_) => "hex value",
Plus => "plus",
Hyphen => "minus",
Slash => "slash",
Star => "star",
Underscore => "underscore",
Backslash => "backslash",
Hashtag => "hashtag",
UnicodeEscape { .. } => "unicode escape sequence",
Raw { .. } => "raw text",
Code { .. } => "code block",
Text(_) => "text",
Invalid("*/") => "end of block comment",
Invalid(_) => "invalid token",
}
}
}
/// An iterator over the tokens of a string of source code.
#[derive(Debug)]
@ -602,10 +455,10 @@ pub fn is_identifier(string: &str) -> bool {
#[cfg(test)]
#[allow(non_snake_case)]
mod tests {
use super::super::span::Spanned;
use super::*;
use crate::length::Length;
use crate::syntax::tests::*;
use crate::parse::tests::{check, s};
use Token::{
BlockComment as BC, Bool, Chain, Hex, Hyphen as Min, Ident as Id,
LeftBrace as LB, LeftBracket as L, LeftParen as LP, Length as Len,

View File

@ -6,7 +6,5 @@ pub use crate::layout::prelude::*;
pub use crate::layout::Command::{self, *};
pub use crate::layout::Commands;
pub use crate::style::*;
pub use crate::syntax::parsing::parse;
pub use crate::syntax::span::{Pos, Span, SpanVec, Spanned};
pub use crate::syntax::tree::*;
pub use crate::syntax::*;
pub use crate::{Feedback, Pass};

View File

@ -1,50 +1,11 @@
//! Syntax trees, parsing and tokenization.
//! Syntax types.
pub mod decoration;
pub mod parsing;
pub mod span;
pub mod tokens;
pub mod tree;
mod decoration;
mod span;
mod token;
mod tree;
#[cfg(test)]
mod tests {
use super::span;
use crate::prelude::*;
use std::fmt::Debug;
/// Assert that expected and found are equal, printing both and panicking
/// and the source of their test case if they aren't.
///
/// When `cmp_spans` is false, spans are ignored.
pub fn check<T>(src: &str, exp: T, found: T, cmp_spans: bool)
where
T: Debug + PartialEq,
{
span::set_cmp(cmp_spans);
let equal = exp == found;
span::set_cmp(true);
if !equal {
println!("source: {:?}", src);
if cmp_spans {
println!("expected: {:#?}", exp);
println!("found: {:#?}", found);
} else {
println!("expected: {:?}", exp);
println!("found: {:?}", found);
}
panic!("test failed");
}
}
pub fn s<T>(sl: usize, sc: usize, el: usize, ec: usize, v: T) -> Spanned<T> {
Spanned::new(v, Span::new(Pos::new(sl, sc), Pos::new(el, ec)))
}
// Enables tests to optionally specify spans.
impl<T> From<T> for Spanned<T> {
fn from(t: T) -> Self {
Spanned::zero(t)
}
}
}
pub use decoration::*;
pub use span::*;
pub use token::*;
pub use tree::*;

View File

@ -1,9 +0,0 @@
//! Parsing of source code into syntax trees.
mod escaping;
mod parser;
pub use parser::parse;
#[cfg(test)]
mod tests;

View File

@ -14,12 +14,6 @@ thread_local! {
static CMP_SPANS: Cell<bool> = Cell::new(true);
}
/// When set to `false` comparisons with `PartialEq` ignore spans.
#[cfg(test)]
pub(crate) fn set_cmp(cmp: bool) {
CMP_SPANS.with(|cell| cell.set(cmp));
}
/// Span offsetting.
pub trait Offset {
/// Offset all spans contained in `Self` by the given position.
@ -132,6 +126,12 @@ impl Span {
pub fn expand(&mut self, other: Self) {
*self = Self::merge(*self, other)
}
/// When set to `false` comparisons with `PartialEq` ignore spans.
#[cfg(test)]
pub(crate) fn set_cmp(cmp: bool) {
CMP_SPANS.with(|cell| cell.set(cmp));
}
}
impl Offset for Span {

152
src/syntax/token.rs Normal file
View File

@ -0,0 +1,152 @@
//! Tokenization.
use super::span::Spanned;
use crate::length::Length;
/// A minimal semantic entity of source code.
#[derive(Debug, Copy, Clone, PartialEq)]
pub enum Token<'s> {
/// One or more whitespace characters. The contained `usize` denotes the
/// number of newlines that were contained in the whitespace.
Space(usize),
/// A line comment with inner string contents `//<str>\n`.
LineComment(&'s str),
/// A block comment with inner string contents `/*<str>*/`. The comment
/// can contain nested block comments.
BlockComment(&'s str),
/// A left bracket starting a function invocation or body: `[`.
LeftBracket,
/// A right bracket ending a function invocation or body: `]`.
RightBracket,
/// A left parenthesis in a function header: `(`.
LeftParen,
/// A right parenthesis in a function header: `)`.
RightParen,
/// A left brace in a function header: `{`.
LeftBrace,
/// A right brace in a function header: `}`.
RightBrace,
/// A double forward chevron in a function header: `>>`.
Chain,
/// A colon in a function header: `:`.
Colon,
/// A comma in a function header: `,`.
Comma,
/// An equals sign in a function header: `=`.
Equals,
/// An identifier in a function header: `center`.
Ident(&'s str),
/// A quoted string in a function header: `"..."`.
Str {
/// The string inside the quotes.
///
/// _Note_: If the string contains escape sequences these are not yet
/// applied to be able to just store a string slice here instead of
/// a String. The escaping is done later in the parser.
string: &'s str,
/// Whether the closing quote was present.
terminated: bool,
},
/// A boolean in a function header: `true | false`.
Bool(bool),
/// A number in a function header: `3.14`.
Number(f64),
/// A length in a function header: `12pt`.
Length(Length),
/// A hex value in a function header: `#20d82a`.
Hex(&'s str),
/// A plus in a function header, signifying the addition of expressions.
Plus,
/// A hyphen in a function header, signifying the subtraction of
/// expressions.
Hyphen,
/// A slash in a function header, signifying the division of expressions.
Slash,
/// A star. It can appear in a function header where it signifies the
/// multiplication of expressions or the body where it modifies the styling.
Star,
/// An underscore in body-text.
Underscore,
/// A backslash followed by whitespace in text.
Backslash,
/// A hashtag token in the body can indicate compute mode or headings.
Hashtag,
/// A unicode escape sequence.
UnicodeEscape {
/// The escape sequence between two braces.
sequence: &'s str,
/// Whether the closing brace was present.
terminated: bool,
},
/// Raw text.
Raw {
/// The raw text (not yet unescaped as for strings).
raw: &'s str,
/// Whether the closing backtick was present.
terminated: bool,
},
/// Multi-line code block.
Code {
/// The language of the code block, if specified.
lang: Option<Spanned<&'s str>>,
/// The raw text (not yet unescaped as for strings).
raw: &'s str,
/// Whether the closing backticks were present.
terminated: bool,
},
/// Any other consecutive string.
Text(&'s str),
/// Things that are not valid in the context they appeared in.
Invalid(&'s str),
}
impl<'s> Token<'s> {
/// The natural-language name for this token for use in error messages.
pub fn name(self) -> &'static str {
match self {
Self::Space(_) => "space",
Self::LineComment(_) => "line comment",
Self::BlockComment(_) => "block comment",
Self::LeftBracket => "opening bracket",
Self::RightBracket => "closing bracket",
Self::LeftParen => "opening paren",
Self::RightParen => "closing paren",
Self::LeftBrace => "opening brace",
Self::RightBrace => "closing brace",
Self::Chain => "function chain operator",
Self::Colon => "colon",
Self::Comma => "comma",
Self::Equals => "equals sign",
Self::Ident(_) => "identifier",
Self::Str { .. } => "string",
Self::Bool(_) => "bool",
Self::Number(_) => "number",
Self::Length(_) => "length",
Self::Hex(_) => "hex value",
Self::Plus => "plus",
Self::Hyphen => "minus",
Self::Slash => "slash",
Self::Star => "star",
Self::Underscore => "underscore",
Self::Backslash => "backslash",
Self::Hashtag => "hashtag",
Self::UnicodeEscape { .. } => "unicode escape sequence",
Self::Raw { .. } => "raw text",
Self::Code { .. } => "code block",
Self::Text(_) => "text",
Self::Invalid("*/") => "end of block comment",
Self::Invalid(_) => "invalid token",
}
}
}

View File

@ -4,12 +4,12 @@ use std::fmt::{self, Debug, Formatter};
use super::decoration::Decoration;
use super::span::{SpanVec, Spanned};
use super::tokens::is_identifier;
use crate::color::RgbaColor;
use crate::compute::table::{SpannedEntry, Table};
use crate::compute::value::{TableValue, Value};
use crate::layout::LayoutContext;
use crate::length::Length;
use crate::parse::is_identifier;
use crate::{DynFuture, Feedback};
/// A collection of nodes which form a tree together with the nodes' children.