Reorganize syntax types into two modules 📦

This commit is contained in:
Laurenz 2020-09-30 12:38:02 +02:00
parent fee5170a68
commit bc1b4216a8
17 changed files with 243 additions and 245 deletions

View File

@ -4,7 +4,7 @@ use std::collections::BTreeMap;
use std::fmt::{self, Debug, Display, Formatter}; use std::fmt::{self, Debug, Display, Formatter};
use std::ops::Index; use std::ops::Index;
use crate::syntax::span::{Span, Spanned}; use crate::syntax::{Span, Spanned};
/// A table data structure, which maps from integers (`u64`) or strings to a /// A table data structure, which maps from integers (`u64`) or strings to a
/// generic value type. /// generic value type.

View File

@ -11,8 +11,7 @@ use crate::color::RgbaColor;
use crate::layout::{Command, Commands, Dir, LayoutContext, SpecAlign}; use crate::layout::{Command, Commands, Dir, LayoutContext, SpecAlign};
use crate::length::{Length, ScaleLength}; use crate::length::{Length, ScaleLength};
use crate::paper::Paper; use crate::paper::Paper;
use crate::syntax::span::{Span, Spanned}; use crate::syntax::{Ident, Span, Spanned, SyntaxNode, SyntaxTree};
use crate::syntax::tree::{Ident, SyntaxNode, SyntaxTree};
use crate::{DynFuture, Feedback, Pass}; use crate::{DynFuture, Feedback, Pass};
/// A computational value. /// A computational value.

View File

@ -7,7 +7,7 @@
#[cfg(feature = "serialize")] #[cfg(feature = "serialize")]
use serde::Serialize; use serde::Serialize;
use crate::syntax::span::SpanVec; use crate::syntax::SpanVec;
/// A list of spanned diagnostics. /// A list of spanned diagnostics.
pub type Diagnostics = SpanVec<Diagnostic>; pub type Diagnostics = SpanVec<Diagnostic>;
@ -42,7 +42,7 @@ impl Diagnostic {
/// ///
/// ``` /// ```
/// # use typstc::error; /// # use typstc::error;
/// # use typstc::syntax::span::Span; /// # use typstc::syntax::Span;
/// # use typstc::Feedback; /// # use typstc::Feedback;
/// # let span = Span::ZERO; /// # let span = Span::ZERO;
/// # let mut feedback = Feedback::new(); /// # let mut feedback = Feedback::new();
@ -87,7 +87,7 @@ macro_rules! __impl_diagnostic {
}; };
($level:expr; $span:expr, $fmt:literal $($tts:tt)*) => { ($level:expr; $span:expr, $fmt:literal $($tts:tt)*) => {
$crate::syntax::span::Spanned::new( $crate::syntax::Spanned::new(
$crate::__impl_diagnostic!($level; $fmt $($tts)*), $crate::__impl_diagnostic!($level; $fmt $($tts)*),
$span, $span,
) )

View File

@ -25,7 +25,7 @@ use crate::compute::scope::Scope;
use crate::font::SharedFontLoader; use crate::font::SharedFontLoader;
use crate::geom::{Margins, Size}; use crate::geom::{Margins, Size};
use crate::style::{LayoutStyle, PageStyle, TextStyle}; use crate::style::{LayoutStyle, PageStyle, TextStyle};
use crate::syntax::tree::SyntaxTree; use crate::syntax::SyntaxTree;
use elements::LayoutElements; use elements::LayoutElements;
use prelude::*; use prelude::*;

View File

@ -4,9 +4,9 @@ use super::line::{LineContext, LineLayouter};
use super::text::{layout_text, TextContext}; use super::text::{layout_text, TextContext};
use super::*; use super::*;
use crate::style::LayoutStyle; use crate::style::LayoutStyle;
use crate::syntax::decoration::Decoration; use crate::syntax::{
use crate::syntax::span::{Span, Spanned}; CallExpr, Code, Decoration, Heading, Span, Spanned, SyntaxNode, SyntaxTree,
use crate::syntax::tree::{CallExpr, Code, Heading, SyntaxNode, SyntaxTree}; };
use crate::{DynFuture, Feedback, Pass}; use crate::{DynFuture, Feedback, Pass};
/// Layout a syntax tree into a collection of boxes. /// Layout a syntax tree into a collection of boxes.

View File

@ -2,9 +2,9 @@
//! //!
//! # Steps //! # Steps
//! - **Parsing:** The parsing step first transforms a plain string into an //! - **Parsing:** The parsing step first transforms a plain string into an
//! [iterator of tokens][tokens]. Then, a parser constructs a syntax tree from //! [iterator of tokens][tokens]. Then, a [parser] constructs a syntax tree
//! the token stream. The structures describing the tree can be found in the //! from the token stream. The structures describing the tree can be found in
//! [syntax] module. //! the [syntax] module.
//! - **Layouting:** The next step is to transform the syntax tree into a //! - **Layouting:** The next step is to transform the syntax tree into a
//! portable representation of the typesetted document. Types for these can be //! portable representation of the typesetted document. Types for these can be
//! found in the [layout] module. A finished layout ready for exporting is a //! found in the [layout] module. A finished layout ready for exporting is a
@ -13,7 +13,8 @@
//! format. Submodules for these formats are located in the [export] module. //! format. Submodules for these formats are located in the [export] module.
//! Currently, the only supported output format is [_PDF_]. //! Currently, the only supported output format is [_PDF_].
//! //!
//! [tokens]: syntax/tokens/struct.Tokens.html //! [tokens]: parse/struct.Tokens.html
//! [parser]: parse/fn.parse.html
//! [syntax]: syntax/index.html //! [syntax]: syntax/index.html
//! [layout]: layout/index.html //! [layout]: layout/index.html
//! [export]: export/index.html //! [export]: export/index.html
@ -34,6 +35,7 @@ pub mod layout;
pub mod length; pub mod length;
pub mod library; pub mod library;
pub mod paper; pub mod paper;
pub mod parse;
pub mod prelude; pub mod prelude;
pub mod style; pub mod style;
pub mod syntax; pub mod syntax;
@ -48,10 +50,7 @@ use crate::diagnostic::Diagnostics;
use crate::font::SharedFontLoader; use crate::font::SharedFontLoader;
use crate::layout::{Commands, MultiLayout}; use crate::layout::{Commands, MultiLayout};
use crate::style::{LayoutStyle, PageStyle, TextStyle}; use crate::style::{LayoutStyle, PageStyle, TextStyle};
use crate::syntax::decoration::Decorations; use crate::syntax::{Decorations, Offset, Pos, SyntaxTree};
use crate::syntax::parsing::parse;
use crate::syntax::span::{Offset, Pos};
use crate::syntax::tree::SyntaxTree;
/// Transforms source code into typesetted layouts. /// Transforms source code into typesetted layouts.
/// ///
@ -87,7 +86,7 @@ impl Typesetter {
/// Parse source code into a syntax tree. /// Parse source code into a syntax tree.
pub fn parse(&self, src: &str) -> Pass<SyntaxTree> { pub fn parse(&self, src: &str) -> Pass<SyntaxTree> {
parse(src) parse::parse(src)
} }
/// Layout a syntax tree and return the produced layout. /// Layout a syntax tree and return the produced layout.

View File

@ -1,4 +1,4 @@
use crate::syntax::tokens::is_newline_char; use super::is_newline_char;
/// Resolves all escape sequences in a string. /// Resolves all escape sequences in a string.
pub fn unescape_string(string: &str) -> String { pub fn unescape_string(string: &str) -> String {

11
src/parse/mod.rs Normal file
View File

@ -0,0 +1,11 @@
//! Parsing and tokenization.
mod escaping;
mod parser;
mod tokenizer;
pub use parser::*;
pub use tokenizer::*;
#[cfg(test)]
mod tests;

View File

@ -1,12 +1,10 @@
use std::str::FromStr; use std::str::FromStr;
use super::escaping::*; use super::escaping::*;
use super::*;
use crate::color::RgbaColor; use crate::color::RgbaColor;
use crate::compute::table::SpannedEntry; use crate::compute::table::SpannedEntry;
use crate::syntax::decoration::Decoration; use crate::syntax::*;
use crate::syntax::span::{Pos, Span, Spanned};
use crate::syntax::tokens::{Token, TokenMode, Tokens};
use crate::syntax::tree::*;
use crate::{Feedback, Pass}; use crate::{Feedback, Pass};
/// Parse a string of source code. /// Parse a string of source code.

View File

@ -1,16 +1,16 @@
#![allow(non_snake_case)] #![allow(non_snake_case)]
use std::fmt::Debug;
use super::parse; use super::parse;
use crate::color::RgbaColor; use crate::color::RgbaColor;
use crate::compute::table::SpannedEntry; use crate::compute::table::SpannedEntry;
use crate::length::Length; use crate::length::Length;
use crate::syntax::decoration::Decoration::*; use crate::syntax::*;
use crate::syntax::span::Spanned;
use crate::syntax::tests::*;
use crate::syntax::tree::*;
// ------------------------------ Construct Syntax Nodes ------------------------------ // // ------------------------------ Construct Syntax Nodes ------------------------------ //
use Decoration::*;
use SyntaxNode::{ use SyntaxNode::{
Linebreak as L, Parbreak as P, Spacing as S, ToggleBolder as B, ToggleItalic as I, Linebreak as L, Parbreak as P, Spacing as S, ToggleBolder as B, ToggleItalic as I,
}; };
@ -161,6 +161,42 @@ macro_rules! d {
}; };
} }
/// Assert that expected and found are equal, printing both and panicking
/// and the source of their test case if they aren't.
///
/// When `cmp_spans` is false, spans are ignored.
pub fn check<T>(src: &str, exp: T, found: T, cmp_spans: bool)
where
T: Debug + PartialEq,
{
Span::set_cmp(cmp_spans);
let equal = exp == found;
Span::set_cmp(true);
if !equal {
println!("source: {:?}", src);
if cmp_spans {
println!("expected: {:#?}", exp);
println!("found: {:#?}", found);
} else {
println!("expected: {:?}", exp);
println!("found: {:?}", found);
}
panic!("test failed");
}
}
pub fn s<T>(sl: usize, sc: usize, el: usize, ec: usize, v: T) -> Spanned<T> {
Spanned::new(v, Span::new(Pos::new(sl, sc), Pos::new(el, ec)))
}
// Enables tests to optionally specify spans.
impl<T> From<T> for Spanned<T> {
fn from(t: T) -> Self {
Spanned::zero(t)
}
}
// --------------------------------------- Tests -------------------------------------- // // --------------------------------------- Tests -------------------------------------- //
#[test] #[test]

View File

@ -4,158 +4,11 @@ use std::iter::Peekable;
use std::str::Chars; use std::str::Chars;
use unicode_xid::UnicodeXID; use unicode_xid::UnicodeXID;
use super::span::{Pos, Span, Spanned};
use crate::length::Length; use crate::length::Length;
use crate::syntax::{Pos, Span, Spanned, Token};
use Token::*; use Token::*;
use TokenMode::*; use TokenMode::*;
/// A minimal semantic entity of source code.
#[derive(Debug, Copy, Clone, PartialEq)]
pub enum Token<'s> {
/// One or more whitespace characters. The contained `usize` denotes the
/// number of newlines that were contained in the whitespace.
Space(usize),
/// A line comment with inner string contents `//<str>\n`.
LineComment(&'s str),
/// A block comment with inner string contents `/*<str>*/`. The comment
/// can contain nested block comments.
BlockComment(&'s str),
/// A left bracket starting a function invocation or body: `[`.
LeftBracket,
/// A right bracket ending a function invocation or body: `]`.
RightBracket,
/// A left parenthesis in a function header: `(`.
LeftParen,
/// A right parenthesis in a function header: `)`.
RightParen,
/// A left brace in a function header: `{`.
LeftBrace,
/// A right brace in a function header: `}`.
RightBrace,
/// A double forward chevron in a function header: `>>`.
Chain,
/// A colon in a function header: `:`.
Colon,
/// A comma in a function header: `,`.
Comma,
/// An equals sign in a function header: `=`.
Equals,
/// An identifier in a function header: `center`.
Ident(&'s str),
/// A quoted string in a function header: `"..."`.
Str {
/// The string inside the quotes.
///
/// _Note_: If the string contains escape sequences these are not yet
/// applied to be able to just store a string slice here instead of
/// a String. The escaping is done later in the parser.
string: &'s str,
/// Whether the closing quote was present.
terminated: bool,
},
/// A boolean in a function header: `true | false`.
Bool(bool),
/// A number in a function header: `3.14`.
Number(f64),
/// A length in a function header: `12pt`.
Length(Length),
/// A hex value in a function header: `#20d82a`.
Hex(&'s str),
/// A plus in a function header, signifying the addition of expressions.
Plus,
/// A hyphen in a function header, signifying the subtraction of
/// expressions.
Hyphen,
/// A slash in a function header, signifying the division of expressions.
Slash,
/// A star. It can appear in a function header where it signifies the
/// multiplication of expressions or the body where it modifies the styling.
Star,
/// An underscore in body-text.
Underscore,
/// A backslash followed by whitespace in text.
Backslash,
/// A hashtag token in the body can indicate compute mode or headings.
Hashtag,
/// A unicode escape sequence.
UnicodeEscape {
/// The escape sequence between two braces.
sequence: &'s str,
/// Whether the closing brace was present.
terminated: bool,
},
/// Raw text.
Raw {
/// The raw text (not yet unescaped as for strings).
raw: &'s str,
/// Whether the closing backtick was present.
terminated: bool,
},
/// Multi-line code block.
Code {
/// The language of the code block, if specified.
lang: Option<Spanned<&'s str>>,
/// The raw text (not yet unescaped as for strings).
raw: &'s str,
/// Whether the closing backticks were present.
terminated: bool,
},
/// Any other consecutive string.
Text(&'s str),
/// Things that are not valid in the context they appeared in.
Invalid(&'s str),
}
impl<'s> Token<'s> {
/// The natural-language name for this token for use in error messages.
pub fn name(self) -> &'static str {
match self {
Space(_) => "space",
LineComment(_) => "line comment",
BlockComment(_) => "block comment",
LeftBracket => "opening bracket",
RightBracket => "closing bracket",
LeftParen => "opening paren",
RightParen => "closing paren",
LeftBrace => "opening brace",
RightBrace => "closing brace",
Chain => "function chain operator",
Colon => "colon",
Comma => "comma",
Equals => "equals sign",
Ident(_) => "identifier",
Str { .. } => "string",
Bool(_) => "bool",
Number(_) => "number",
Length(_) => "length",
Hex(_) => "hex value",
Plus => "plus",
Hyphen => "minus",
Slash => "slash",
Star => "star",
Underscore => "underscore",
Backslash => "backslash",
Hashtag => "hashtag",
UnicodeEscape { .. } => "unicode escape sequence",
Raw { .. } => "raw text",
Code { .. } => "code block",
Text(_) => "text",
Invalid("*/") => "end of block comment",
Invalid(_) => "invalid token",
}
}
}
/// An iterator over the tokens of a string of source code. /// An iterator over the tokens of a string of source code.
#[derive(Debug)] #[derive(Debug)]
@ -602,10 +455,10 @@ pub fn is_identifier(string: &str) -> bool {
#[cfg(test)] #[cfg(test)]
#[allow(non_snake_case)] #[allow(non_snake_case)]
mod tests { mod tests {
use super::super::span::Spanned;
use super::*; use super::*;
use crate::length::Length; use crate::length::Length;
use crate::syntax::tests::*; use crate::parse::tests::{check, s};
use Token::{ use Token::{
BlockComment as BC, Bool, Chain, Hex, Hyphen as Min, Ident as Id, BlockComment as BC, Bool, Chain, Hex, Hyphen as Min, Ident as Id,
LeftBrace as LB, LeftBracket as L, LeftParen as LP, Length as Len, LeftBrace as LB, LeftBracket as L, LeftParen as LP, Length as Len,

View File

@ -6,7 +6,5 @@ pub use crate::layout::prelude::*;
pub use crate::layout::Command::{self, *}; pub use crate::layout::Command::{self, *};
pub use crate::layout::Commands; pub use crate::layout::Commands;
pub use crate::style::*; pub use crate::style::*;
pub use crate::syntax::parsing::parse; pub use crate::syntax::*;
pub use crate::syntax::span::{Pos, Span, SpanVec, Spanned};
pub use crate::syntax::tree::*;
pub use crate::{Feedback, Pass}; pub use crate::{Feedback, Pass};

View File

@ -1,50 +1,11 @@
//! Syntax trees, parsing and tokenization. //! Syntax types.
pub mod decoration; mod decoration;
pub mod parsing; mod span;
pub mod span; mod token;
pub mod tokens; mod tree;
pub mod tree;
#[cfg(test)] pub use decoration::*;
mod tests { pub use span::*;
use super::span; pub use token::*;
use crate::prelude::*; pub use tree::*;
use std::fmt::Debug;
/// Assert that expected and found are equal, printing both and panicking
/// and the source of their test case if they aren't.
///
/// When `cmp_spans` is false, spans are ignored.
pub fn check<T>(src: &str, exp: T, found: T, cmp_spans: bool)
where
T: Debug + PartialEq,
{
span::set_cmp(cmp_spans);
let equal = exp == found;
span::set_cmp(true);
if !equal {
println!("source: {:?}", src);
if cmp_spans {
println!("expected: {:#?}", exp);
println!("found: {:#?}", found);
} else {
println!("expected: {:?}", exp);
println!("found: {:?}", found);
}
panic!("test failed");
}
}
pub fn s<T>(sl: usize, sc: usize, el: usize, ec: usize, v: T) -> Spanned<T> {
Spanned::new(v, Span::new(Pos::new(sl, sc), Pos::new(el, ec)))
}
// Enables tests to optionally specify spans.
impl<T> From<T> for Spanned<T> {
fn from(t: T) -> Self {
Spanned::zero(t)
}
}
}

View File

@ -1,9 +0,0 @@
//! Parsing of source code into syntax trees.
mod escaping;
mod parser;
pub use parser::parse;
#[cfg(test)]
mod tests;

View File

@ -14,12 +14,6 @@ thread_local! {
static CMP_SPANS: Cell<bool> = Cell::new(true); static CMP_SPANS: Cell<bool> = Cell::new(true);
} }
/// When set to `false` comparisons with `PartialEq` ignore spans.
#[cfg(test)]
pub(crate) fn set_cmp(cmp: bool) {
CMP_SPANS.with(|cell| cell.set(cmp));
}
/// Span offsetting. /// Span offsetting.
pub trait Offset { pub trait Offset {
/// Offset all spans contained in `Self` by the given position. /// Offset all spans contained in `Self` by the given position.
@ -132,6 +126,12 @@ impl Span {
pub fn expand(&mut self, other: Self) { pub fn expand(&mut self, other: Self) {
*self = Self::merge(*self, other) *self = Self::merge(*self, other)
} }
/// When set to `false` comparisons with `PartialEq` ignore spans.
#[cfg(test)]
pub(crate) fn set_cmp(cmp: bool) {
CMP_SPANS.with(|cell| cell.set(cmp));
}
} }
impl Offset for Span { impl Offset for Span {

152
src/syntax/token.rs Normal file
View File

@ -0,0 +1,152 @@
//! Tokenization.
use super::span::Spanned;
use crate::length::Length;
/// A minimal semantic entity of source code.
#[derive(Debug, Copy, Clone, PartialEq)]
pub enum Token<'s> {
/// One or more whitespace characters. The contained `usize` denotes the
/// number of newlines that were contained in the whitespace.
Space(usize),
/// A line comment with inner string contents `//<str>\n`.
LineComment(&'s str),
/// A block comment with inner string contents `/*<str>*/`. The comment
/// can contain nested block comments.
BlockComment(&'s str),
/// A left bracket starting a function invocation or body: `[`.
LeftBracket,
/// A right bracket ending a function invocation or body: `]`.
RightBracket,
/// A left parenthesis in a function header: `(`.
LeftParen,
/// A right parenthesis in a function header: `)`.
RightParen,
/// A left brace in a function header: `{`.
LeftBrace,
/// A right brace in a function header: `}`.
RightBrace,
/// A double forward chevron in a function header: `>>`.
Chain,
/// A colon in a function header: `:`.
Colon,
/// A comma in a function header: `,`.
Comma,
/// An equals sign in a function header: `=`.
Equals,
/// An identifier in a function header: `center`.
Ident(&'s str),
/// A quoted string in a function header: `"..."`.
Str {
/// The string inside the quotes.
///
/// _Note_: If the string contains escape sequences these are not yet
/// applied to be able to just store a string slice here instead of
/// a String. The escaping is done later in the parser.
string: &'s str,
/// Whether the closing quote was present.
terminated: bool,
},
/// A boolean in a function header: `true | false`.
Bool(bool),
/// A number in a function header: `3.14`.
Number(f64),
/// A length in a function header: `12pt`.
Length(Length),
/// A hex value in a function header: `#20d82a`.
Hex(&'s str),
/// A plus in a function header, signifying the addition of expressions.
Plus,
/// A hyphen in a function header, signifying the subtraction of
/// expressions.
Hyphen,
/// A slash in a function header, signifying the division of expressions.
Slash,
/// A star. It can appear in a function header where it signifies the
/// multiplication of expressions or the body where it modifies the styling.
Star,
/// An underscore in body-text.
Underscore,
/// A backslash followed by whitespace in text.
Backslash,
/// A hashtag token in the body can indicate compute mode or headings.
Hashtag,
/// A unicode escape sequence.
UnicodeEscape {
/// The escape sequence between two braces.
sequence: &'s str,
/// Whether the closing brace was present.
terminated: bool,
},
/// Raw text.
Raw {
/// The raw text (not yet unescaped as for strings).
raw: &'s str,
/// Whether the closing backtick was present.
terminated: bool,
},
/// Multi-line code block.
Code {
/// The language of the code block, if specified.
lang: Option<Spanned<&'s str>>,
/// The raw text (not yet unescaped as for strings).
raw: &'s str,
/// Whether the closing backticks were present.
terminated: bool,
},
/// Any other consecutive string.
Text(&'s str),
/// Things that are not valid in the context they appeared in.
Invalid(&'s str),
}
impl<'s> Token<'s> {
/// The natural-language name for this token for use in error messages.
pub fn name(self) -> &'static str {
match self {
Self::Space(_) => "space",
Self::LineComment(_) => "line comment",
Self::BlockComment(_) => "block comment",
Self::LeftBracket => "opening bracket",
Self::RightBracket => "closing bracket",
Self::LeftParen => "opening paren",
Self::RightParen => "closing paren",
Self::LeftBrace => "opening brace",
Self::RightBrace => "closing brace",
Self::Chain => "function chain operator",
Self::Colon => "colon",
Self::Comma => "comma",
Self::Equals => "equals sign",
Self::Ident(_) => "identifier",
Self::Str { .. } => "string",
Self::Bool(_) => "bool",
Self::Number(_) => "number",
Self::Length(_) => "length",
Self::Hex(_) => "hex value",
Self::Plus => "plus",
Self::Hyphen => "minus",
Self::Slash => "slash",
Self::Star => "star",
Self::Underscore => "underscore",
Self::Backslash => "backslash",
Self::Hashtag => "hashtag",
Self::UnicodeEscape { .. } => "unicode escape sequence",
Self::Raw { .. } => "raw text",
Self::Code { .. } => "code block",
Self::Text(_) => "text",
Self::Invalid("*/") => "end of block comment",
Self::Invalid(_) => "invalid token",
}
}
}

View File

@ -4,12 +4,12 @@ use std::fmt::{self, Debug, Formatter};
use super::decoration::Decoration; use super::decoration::Decoration;
use super::span::{SpanVec, Spanned}; use super::span::{SpanVec, Spanned};
use super::tokens::is_identifier;
use crate::color::RgbaColor; use crate::color::RgbaColor;
use crate::compute::table::{SpannedEntry, Table}; use crate::compute::table::{SpannedEntry, Table};
use crate::compute::value::{TableValue, Value}; use crate::compute::value::{TableValue, Value};
use crate::layout::LayoutContext; use crate::layout::LayoutContext;
use crate::length::Length; use crate::length::Length;
use crate::parse::is_identifier;
use crate::{DynFuture, Feedback}; use crate::{DynFuture, Feedback};
/// A collection of nodes which form a tree together with the nodes' children. /// A collection of nodes which form a tree together with the nodes' children.