mirror of
https://github.com/typst/typst
synced 2025-05-14 17:15:28 +08:00
Create test runner which renders layouts to images 🗺
This commit is contained in:
parent
8f788f9a4f
commit
c0e4fd55e6
1
.gitignore
vendored
1
.gitignore
vendored
@ -2,3 +2,4 @@
|
|||||||
**/*.rs.bk
|
**/*.rs.bk
|
||||||
Cargo.lock
|
Cargo.lock
|
||||||
things
|
things
|
||||||
|
test-cache
|
||||||
|
13
src/doc.rs
13
src/doc.rs
@ -1,5 +1,6 @@
|
|||||||
//! Representation of typesetted documents.
|
//! Representation of typesetted documents.
|
||||||
|
|
||||||
|
use std::io::{self, Write};
|
||||||
use crate::size::{Size, Size2D};
|
use crate::size::{Size, Size2D};
|
||||||
|
|
||||||
|
|
||||||
@ -31,3 +32,15 @@ pub enum LayoutAction {
|
|||||||
/// Write text starting at the current position.
|
/// Write text starting at the current position.
|
||||||
WriteText(String),
|
WriteText(String),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl LayoutAction {
|
||||||
|
/// Serialize this layout action into a string representation.
|
||||||
|
pub fn serialize<W: Write>(&self, f: &mut W) -> io::Result<()> {
|
||||||
|
use LayoutAction::*;
|
||||||
|
match self {
|
||||||
|
MoveAbsolute(s) => write!(f, "m {:.4} {:.4}", s.x.to_pt(), s.y.to_pt()),
|
||||||
|
SetFont(i, s) => write!(f, "f {} {}", i, s),
|
||||||
|
WriteText(s) => write!(f, "w {}", s),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
//! Block-style layouting of boxes.
|
//! Block-style layouting of boxes.
|
||||||
|
|
||||||
|
use std::io::{self, Write};
|
||||||
use crate::doc::{Document, Page, LayoutAction};
|
use crate::doc::{Document, Page, LayoutAction};
|
||||||
use crate::size::{Size, Size2D};
|
use crate::size::{Size, Size2D};
|
||||||
use super::{ActionList, LayoutSpace, Alignment, LayoutResult, LayoutError};
|
use super::{ActionList, LayoutSpace, Alignment, LayoutResult, LayoutError};
|
||||||
@ -25,6 +26,16 @@ impl BoxLayout {
|
|||||||
}],
|
}],
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Serialize this layout into a string representation.
|
||||||
|
pub fn serialize<W: Write>(&self, f: &mut W) -> io::Result<()> {
|
||||||
|
writeln!(f, "{:.4} {:.4}", self.dimensions.x.to_pt(), self.dimensions.y.to_pt())?;
|
||||||
|
for action in &self.actions {
|
||||||
|
action.serialize(f)?;
|
||||||
|
writeln!(f)?;
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// The context for layouting boxes.
|
/// The context for layouting boxes.
|
||||||
|
@ -157,6 +157,9 @@ impl FlexFinisher {
|
|||||||
|
|
||||||
/// Layout the glue.
|
/// Layout the glue.
|
||||||
fn glue(&mut self, glue: BoxLayout) {
|
fn glue(&mut self, glue: BoxLayout) {
|
||||||
|
if let Some(glue) = self.glue.take() {
|
||||||
|
self.append(glue);
|
||||||
|
}
|
||||||
self.glue = Some(glue);
|
self.glue = Some(glue);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
53
src/lib.rs
53
src/lib.rs
@ -136,56 +136,3 @@ error_type! {
|
|||||||
from: (ParseError, TypesetError::Parse(err)),
|
from: (ParseError, TypesetError::Parse(err)),
|
||||||
from: (LayoutError, TypesetError::Layout(err)),
|
from: (LayoutError, TypesetError::Layout(err)),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
mod test {
|
|
||||||
use std::fs::File;
|
|
||||||
use std::io::BufWriter;
|
|
||||||
use crate::Typesetter;
|
|
||||||
use crate::export::pdf::PdfExporter;
|
|
||||||
use toddle::query::FileSystemFontProvider;
|
|
||||||
|
|
||||||
/// Create a _PDF_ with a name from the source code.
|
|
||||||
fn test(name: &str, src: &str) {
|
|
||||||
let mut typesetter = Typesetter::new();
|
|
||||||
let provider = FileSystemFontProvider::from_listing("fonts/fonts.toml").unwrap();
|
|
||||||
typesetter.add_font_provider(provider);
|
|
||||||
|
|
||||||
// Typeset into document.
|
|
||||||
let document = typesetter.typeset(src).unwrap();
|
|
||||||
|
|
||||||
// Write to file.
|
|
||||||
let path = format!("../target/typeset-unit-{}.pdf", name);
|
|
||||||
let file = BufWriter::new(File::create(path).unwrap());
|
|
||||||
let exporter = PdfExporter::new();
|
|
||||||
exporter.export(&document, typesetter.loader(), file).unwrap();
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn features() {
|
|
||||||
test("features", r"
|
|
||||||
*Features Test Page*
|
|
||||||
|
|
||||||
_Multiline:_
|
|
||||||
Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy
|
|
||||||
eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam
|
|
||||||
voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet
|
|
||||||
clita kasd gubergren, no sea takimata sanctus est.
|
|
||||||
|
|
||||||
_Emoji:_ Hello World! 🌍
|
|
||||||
|
|
||||||
_Styles:_ This is made *bold*, that _italic_ and this one `monospace` using the
|
|
||||||
built-in syntax!
|
|
||||||
|
|
||||||
_Styles with functions:_ This [bold][word] is made bold and [italic][that] is italic
|
|
||||||
using the standard library functions [mono][bold] and `italic`!
|
|
||||||
");
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn shakespeare() {
|
|
||||||
test("shakespeare", include_str!("../test/shakespeare.tps"));
|
|
||||||
test("shakespeare-right", &format!("[align:right][{}]", include_str!("../test/shakespeare.tps")));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
@ -1,346 +1,15 @@
|
|||||||
//! Tokenization and parsing of source code into syntax trees.
|
//! Parsing of source code into token streams an syntax trees.
|
||||||
|
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::str::CharIndices;
|
|
||||||
|
|
||||||
use smallvec::SmallVec;
|
|
||||||
use unicode_xid::UnicodeXID;
|
use unicode_xid::UnicodeXID;
|
||||||
|
|
||||||
use crate::func::{Function, Scope};
|
use crate::func::{Function, Scope};
|
||||||
use crate::syntax::*;
|
use crate::syntax::*;
|
||||||
use crate::size::Size;
|
use crate::size::Size;
|
||||||
|
|
||||||
|
mod tokens;
|
||||||
|
pub use tokens::{tokenize, Tokens};
|
||||||
|
|
||||||
/// Builds an iterator over the tokens of the source code.
|
|
||||||
#[inline]
|
|
||||||
pub fn tokenize(src: &str) -> Tokens {
|
|
||||||
Tokens::new(src)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// An iterator over the tokens of source code.
|
|
||||||
#[derive(Debug, Clone)]
|
|
||||||
pub struct Tokens<'s> {
|
|
||||||
src: &'s str,
|
|
||||||
chars: PeekableChars<'s>,
|
|
||||||
state: TokensState,
|
|
||||||
stack: SmallVec<[TokensState; 1]>,
|
|
||||||
}
|
|
||||||
|
|
||||||
/// The state the tokenizer is in.
|
|
||||||
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
|
|
||||||
enum TokensState {
|
|
||||||
/// The base state if there is nothing special we are in.
|
|
||||||
Body,
|
|
||||||
/// Inside a function header. Here colons and equal signs get parsed
|
|
||||||
/// as distinct tokens rather than text.
|
|
||||||
Function,
|
|
||||||
/// We expect either the end of the function or the beginning of the body.
|
|
||||||
MaybeBody,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'s> Tokens<'s> {
|
|
||||||
/// Create a new token stream from source code.
|
|
||||||
fn new(src: &'s str) -> Tokens<'s> {
|
|
||||||
Tokens {
|
|
||||||
src,
|
|
||||||
chars: PeekableChars::new(src),
|
|
||||||
state: TokensState::Body,
|
|
||||||
stack: SmallVec::new(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Advance the iterator by one step.
|
|
||||||
fn advance(&mut self) {
|
|
||||||
self.chars.next();
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Switch to the given state.
|
|
||||||
fn switch(&mut self, state: TokensState) {
|
|
||||||
self.stack.push(self.state);
|
|
||||||
self.state = state;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Go back to the top-of-stack state.
|
|
||||||
fn unswitch(&mut self) {
|
|
||||||
self.state = self.stack.pop().unwrap_or(TokensState::Body);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Advance and return the given token.
|
|
||||||
fn consumed(&mut self, token: Token<'s>) -> Token<'s> {
|
|
||||||
self.advance();
|
|
||||||
token
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Returns a word containing the string bounded by the given indices.
|
|
||||||
fn text(&self, start: usize, end: usize) -> Token<'s> {
|
|
||||||
Token::Text(&self.src[start .. end])
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'s> Iterator for Tokens<'s> {
|
|
||||||
type Item = Token<'s>;
|
|
||||||
|
|
||||||
/// Advance the iterator, return the next token or nothing.
|
|
||||||
fn next(&mut self) -> Option<Token<'s>> {
|
|
||||||
use TokensState as TU;
|
|
||||||
|
|
||||||
// Go to the body state if the function has a body or return to the top-of-stack state.
|
|
||||||
if self.state == TU::MaybeBody {
|
|
||||||
if self.chars.peek()?.1 == '[' {
|
|
||||||
self.state = TU::Body;
|
|
||||||
return Some(self.consumed(Token::LeftBracket));
|
|
||||||
} else {
|
|
||||||
self.unswitch();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Take the next char and peek at the one behind.
|
|
||||||
let (next_pos, next) = self.chars.next()?;
|
|
||||||
let afterwards = self.chars.peek().map(|p| p.1);
|
|
||||||
|
|
||||||
Some(match next {
|
|
||||||
// Functions
|
|
||||||
'[' => {
|
|
||||||
self.switch(TU::Function);
|
|
||||||
Token::LeftBracket
|
|
||||||
},
|
|
||||||
']' => {
|
|
||||||
if self.state == TU::Function {
|
|
||||||
self.state = TU::MaybeBody;
|
|
||||||
} else {
|
|
||||||
self.unswitch();
|
|
||||||
}
|
|
||||||
Token::RightBracket
|
|
||||||
},
|
|
||||||
|
|
||||||
// Line comment
|
|
||||||
'/' if afterwards == Some('/') => {
|
|
||||||
let mut end = self.chars.next().unwrap();
|
|
||||||
let start = end.0 + end.1.len_utf8();
|
|
||||||
|
|
||||||
while let Some((index, c)) = self.chars.peek() {
|
|
||||||
if is_newline_char(c) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
self.advance();
|
|
||||||
end = (index, c);
|
|
||||||
}
|
|
||||||
|
|
||||||
let end = end.0 + end.1.len_utf8();
|
|
||||||
Token::LineComment(&self.src[start .. end])
|
|
||||||
},
|
|
||||||
|
|
||||||
// Block comment
|
|
||||||
'/' if afterwards == Some('*') => {
|
|
||||||
let mut end = self.chars.next().unwrap();
|
|
||||||
let start = end.0 + end.1.len_utf8();
|
|
||||||
|
|
||||||
let mut nested = 0;
|
|
||||||
while let Some((index, c)) = self.chars.next() {
|
|
||||||
let after = self.chars.peek().map(|p| p.1);
|
|
||||||
match (c, after) {
|
|
||||||
('*', Some('/')) if nested == 0 => { self.advance(); break },
|
|
||||||
('/', Some('*')) => { self.advance(); nested += 1 },
|
|
||||||
('*', Some('/')) => { self.advance(); nested -= 1 },
|
|
||||||
_ => {},
|
|
||||||
}
|
|
||||||
end = (index, c);
|
|
||||||
}
|
|
||||||
|
|
||||||
let end = end.0 + end.1.len_utf8();
|
|
||||||
Token::BlockComment(&self.src[start .. end])
|
|
||||||
},
|
|
||||||
|
|
||||||
// Unexpected end of block comment
|
|
||||||
'*' if afterwards == Some('/') => self.consumed(Token::StarSlash),
|
|
||||||
|
|
||||||
// Whitespace
|
|
||||||
' ' | '\t' => {
|
|
||||||
while let Some((_, c)) = self.chars.peek() {
|
|
||||||
match c {
|
|
||||||
' ' | '\t' => self.advance(),
|
|
||||||
_ => break,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Token::Space
|
|
||||||
}
|
|
||||||
|
|
||||||
// Newlines
|
|
||||||
'\r' if afterwards == Some('\n') => self.consumed(Token::Newline),
|
|
||||||
c if is_newline_char(c) => Token::Newline,
|
|
||||||
|
|
||||||
// Star/Underscore/Backtick in bodies
|
|
||||||
'*' if self.state == TU::Body => Token::Star,
|
|
||||||
'_' if self.state == TU::Body => Token::Underscore,
|
|
||||||
'`' if self.state == TU::Body => Token::Backtick,
|
|
||||||
|
|
||||||
// Context sensitive operators in headers
|
|
||||||
':' if self.state == TU::Function => Token::Colon,
|
|
||||||
'=' if self.state == TU::Function => Token::Equals,
|
|
||||||
',' if self.state == TU::Function => Token::Comma,
|
|
||||||
|
|
||||||
// A string value.
|
|
||||||
'"' if self.state == TU::Function => {
|
|
||||||
// Find out when the word ends.
|
|
||||||
let mut escaped = false;
|
|
||||||
let mut end = (next_pos, next);
|
|
||||||
|
|
||||||
while let Some((index, c)) = self.chars.next() {
|
|
||||||
if c == '"' && !escaped {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
escaped = c == '\\';
|
|
||||||
end = (index, c);
|
|
||||||
}
|
|
||||||
|
|
||||||
let end_pos = end.0 + end.1.len_utf8();
|
|
||||||
Token::Quoted(&self.src[next_pos + 1 .. end_pos])
|
|
||||||
}
|
|
||||||
|
|
||||||
// Escaping
|
|
||||||
'\\' => {
|
|
||||||
if let Some((index, c)) = self.chars.peek() {
|
|
||||||
let escapable = match c {
|
|
||||||
'[' | ']' | '\\' | '*' | '_' | '`' | ':' | '=' | '/' => true,
|
|
||||||
_ => false,
|
|
||||||
};
|
|
||||||
|
|
||||||
if escapable {
|
|
||||||
self.advance();
|
|
||||||
return Some(self.text(index, index + c.len_utf8()));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Token::Text("\\")
|
|
||||||
},
|
|
||||||
|
|
||||||
// Normal text
|
|
||||||
_ => {
|
|
||||||
// Find out when the word ends.
|
|
||||||
let mut end = (next_pos, next);
|
|
||||||
while let Some((index, c)) = self.chars.peek() {
|
|
||||||
let second = self.chars.peek_second().map(|p| p.1);
|
|
||||||
|
|
||||||
// Whether the next token is still from the text or not.
|
|
||||||
let continues = match c {
|
|
||||||
'[' | ']' | '\\' => false,
|
|
||||||
'*' | '_' | '`' if self.state == TU::Body => false,
|
|
||||||
':' | '=' | ',' | '"' if self.state == TU::Function => false,
|
|
||||||
|
|
||||||
'/' => second != Some('/') && second != Some('*'),
|
|
||||||
'*' => second != Some('/'),
|
|
||||||
|
|
||||||
' ' | '\t' => false,
|
|
||||||
c if is_newline_char(c) => false,
|
|
||||||
|
|
||||||
_ => true,
|
|
||||||
};
|
|
||||||
|
|
||||||
if !continues {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
end = (index, c);
|
|
||||||
self.advance();
|
|
||||||
}
|
|
||||||
|
|
||||||
let end_pos = end.0 + end.1.len_utf8();
|
|
||||||
self.text(next_pos, end_pos)
|
|
||||||
},
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Whether this character is a newline (or starts one).
|
|
||||||
fn is_newline_char(character: char) -> bool {
|
|
||||||
match character {
|
|
||||||
'\n' | '\r' | '\u{000c}' | '\u{0085}' | '\u{2028}' | '\u{2029}' => true,
|
|
||||||
_ => false,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// A (index, char) iterator with double lookahead.
|
|
||||||
#[derive(Debug, Clone)]
|
|
||||||
struct PeekableChars<'s> {
|
|
||||||
offset: usize,
|
|
||||||
string: &'s str,
|
|
||||||
chars: CharIndices<'s>,
|
|
||||||
peek1: Option<Option<(usize, char)>>,
|
|
||||||
peek2: Option<Option<(usize, char)>>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'s> PeekableChars<'s> {
|
|
||||||
/// Create a new iterator from a string.
|
|
||||||
fn new(string: &'s str) -> PeekableChars<'s> {
|
|
||||||
PeekableChars {
|
|
||||||
offset: 0,
|
|
||||||
string,
|
|
||||||
chars: string.char_indices(),
|
|
||||||
peek1: None,
|
|
||||||
peek2: None,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Peek at the next element.
|
|
||||||
fn peek(&mut self) -> Option<(usize, char)> {
|
|
||||||
match self.peek1 {
|
|
||||||
Some(peeked) => peeked,
|
|
||||||
None => {
|
|
||||||
let next = self.next_inner();
|
|
||||||
self.peek1 = Some(next);
|
|
||||||
next
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Peek at the element after the next element.
|
|
||||||
fn peek_second(&mut self) -> Option<(usize, char)> {
|
|
||||||
match self.peek2 {
|
|
||||||
Some(peeked) => peeked,
|
|
||||||
None => {
|
|
||||||
self.peek();
|
|
||||||
let next = self.next_inner();
|
|
||||||
self.peek2 = Some(next);
|
|
||||||
next
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Return the next value of the inner iterator mapped with the offset.
|
|
||||||
fn next_inner(&mut self) -> Option<(usize, char)> {
|
|
||||||
self.chars.next().map(|(i, c)| (i + self.offset, c))
|
|
||||||
}
|
|
||||||
|
|
||||||
/// The index of the first character of the next token in the source string.
|
|
||||||
fn current_index(&mut self) -> Option<usize> {
|
|
||||||
self.peek().map(|p| p.0)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Go to a new position in the underlying string.
|
|
||||||
fn goto(&mut self, index: usize) {
|
|
||||||
self.offset = index;
|
|
||||||
self.chars = self.string[index..].char_indices();
|
|
||||||
self.peek1 = None;
|
|
||||||
self.peek2 = None;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Iterator for PeekableChars<'_> {
|
|
||||||
type Item = (usize, char);
|
|
||||||
|
|
||||||
fn next(&mut self) -> Option<(usize, char)> {
|
|
||||||
match self.peek1.take() {
|
|
||||||
Some(value) => {
|
|
||||||
self.peek1 = self.peek2.take();
|
|
||||||
value
|
|
||||||
},
|
|
||||||
None => self.next_inner(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
//------------------------------------------------------------------------------------------------//
|
|
||||||
|
|
||||||
/// Parses source code into a syntax tree given a context.
|
/// Parses source code into a syntax tree given a context.
|
||||||
#[inline]
|
#[inline]
|
||||||
@ -740,7 +409,6 @@ fn is_identifier(string: &str) -> bool {
|
|||||||
true
|
true
|
||||||
}
|
}
|
||||||
|
|
||||||
//------------------------------------------------------------------------------------------------//
|
|
||||||
|
|
||||||
/// The error type for parsing.
|
/// The error type for parsing.
|
||||||
pub struct ParseError(String);
|
pub struct ParseError(String);
|
||||||
@ -762,137 +430,7 @@ error_type! {
|
|||||||
|
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod token_tests {
|
mod tests {
|
||||||
use super::*;
|
|
||||||
use Token::{Space as S, Newline as N, LeftBracket as L, RightBracket as R,
|
|
||||||
Colon as C, Equals as E, Quoted as Q, Underscore as TU, Star as TS,
|
|
||||||
Backtick as TB, Text as T, LineComment as LC, BlockComment as BC,
|
|
||||||
StarSlash as SS};
|
|
||||||
|
|
||||||
/// Test if the source code tokenizes to the tokens.
|
|
||||||
fn test(src: &str, tokens: Vec<Token>) {
|
|
||||||
assert_eq!(Tokens::new(src).collect::<Vec<_>>(), tokens);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Tokenizes the basic building blocks.
|
|
||||||
#[test]
|
|
||||||
fn tokenize_base() {
|
|
||||||
test("", vec![]);
|
|
||||||
test("Hallo", vec![T("Hallo")]);
|
|
||||||
test("[", vec![L]);
|
|
||||||
test("]", vec![R]);
|
|
||||||
test("*", vec![TS]);
|
|
||||||
test("_", vec![TU]);
|
|
||||||
test("`", vec![TB]);
|
|
||||||
test("\n", vec![N]);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// This test looks if LF- and CRLF-style newlines get both identified correctly.
|
|
||||||
#[test]
|
|
||||||
fn tokenize_whitespace_newlines() {
|
|
||||||
test(" \t", vec![S]);
|
|
||||||
test("First line\r\nSecond line\nThird line\n",
|
|
||||||
vec![T("First"), S, T("line"), N, T("Second"), S, T("line"), N,
|
|
||||||
T("Third"), S, T("line"), N]);
|
|
||||||
test("Hello \n ", vec![T("Hello"), S, N, S]);
|
|
||||||
test("Dense\nTimes", vec![T("Dense"), N, T("Times")]);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Tests if escaping with backslash works as it should.
|
|
||||||
#[test]
|
|
||||||
fn tokenize_escape() {
|
|
||||||
test(r"\[", vec![T("[")]);
|
|
||||||
test(r"\]", vec![T("]")]);
|
|
||||||
test(r"\**", vec![T("*"), TS]);
|
|
||||||
test(r"\*", vec![T("*")]);
|
|
||||||
test(r"\__", vec![T("_"), TU]);
|
|
||||||
test(r"\_", vec![T("_")]);
|
|
||||||
test(r"\hello", vec![T("\\"), T("hello")]);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Tests if escaped strings work.
|
|
||||||
#[test]
|
|
||||||
fn tokenize_quoted() {
|
|
||||||
test(r#"[align: "hello\"world"]"#, vec![L, T("align"), C, S, Q(r#"hello\"world"#), R]);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Tokenizes some more realistic examples.
|
|
||||||
#[test]
|
|
||||||
fn tokenize_examples() {
|
|
||||||
test(r"
|
|
||||||
[function][
|
|
||||||
Test [italic][example]!
|
|
||||||
]
|
|
||||||
", vec![
|
|
||||||
N, S, L, T("function"), R, L, N, S, T("Test"), S, L, T("italic"), R, L,
|
|
||||||
T("example"), R, T("!"), N, S, R, N, S
|
|
||||||
]);
|
|
||||||
|
|
||||||
test(r"
|
|
||||||
[page: size=A4]
|
|
||||||
[font: size=12pt]
|
|
||||||
|
|
||||||
Das ist ein Beispielsatz mit *fetter* Schrift.
|
|
||||||
", vec![
|
|
||||||
N, S, L, T("page"), C, S, T("size"), E, T("A4"), R, N, S,
|
|
||||||
L, T("font"), C, S, T("size"), E, T("12pt"), R, N, N, S,
|
|
||||||
T("Das"), S, T("ist"), S, T("ein"), S, T("Beispielsatz"), S, T("mit"), S,
|
|
||||||
TS, T("fetter"), TS, S, T("Schrift."), N, S
|
|
||||||
]);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// This test checks whether the colon and equals symbols get parsed correctly depending on the
|
|
||||||
/// context: Either in a function header or in a body.
|
|
||||||
#[test]
|
|
||||||
fn tokenize_symbols_context() {
|
|
||||||
test("[func: key=value][Answer: 7]",
|
|
||||||
vec![L, T("func"), C, S, T("key"), E, T("value"), R, L,
|
|
||||||
T("Answer:"), S, T("7"), R]);
|
|
||||||
test("[[n: k=v]:x][:[=]]:=",
|
|
||||||
vec![L, L, T("n"), C, S, T("k"), E, T("v"), R, C, T("x"), R,
|
|
||||||
L, T(":"), L, E, R, R, T(":=")]);
|
|
||||||
test("[hi: k=[func][body] v=1][hello]",
|
|
||||||
vec![L, T("hi"), C, S, T("k"), E, L, T("func"), R, L, T("body"), R, S,
|
|
||||||
T("v"), E, T("1"), R, L, T("hello"), R]);
|
|
||||||
test("[func: __key__=value]",
|
|
||||||
vec![L, T("func"), C, S, T("__key__"), E, T("value"), R]);
|
|
||||||
test("The /*[*/ answer: 7.",
|
|
||||||
vec![T("The"), S, BC("["), S, T("answer:"), S, T("7.")]);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Test if block and line comments get tokenized as expected.
|
|
||||||
#[test]
|
|
||||||
fn tokenize_comments() {
|
|
||||||
test("These // Line comments.",
|
|
||||||
vec![T("These"), S, LC(" Line comments.")]);
|
|
||||||
test("This /* is */ a comment.",
|
|
||||||
vec![T("This"), S, BC(" is "), S, T("a"), S, T("comment.")]);
|
|
||||||
test("[Head/*of*/][Body]", vec![L, T("Head"), BC("of"), R, L, T("Body"), R]);
|
|
||||||
test("/* Hey */ */", vec![BC(" Hey "), S, SS]);
|
|
||||||
test("Hey\n// Yoo /*\n*/", vec![T("Hey"), N, LC(" Yoo /*"), N, SS]);
|
|
||||||
test("/* My /* line // */ comment */", vec![BC(" My /* line // */ comment ")])
|
|
||||||
}
|
|
||||||
|
|
||||||
/// This test has a special look at the underscore syntax.
|
|
||||||
#[test]
|
|
||||||
fn tokenize_underscores() {
|
|
||||||
test("he_llo_world_ __ Now this_ is_ special!",
|
|
||||||
vec![T("he"), TU, T("llo"), TU, T("world"), TU, S, TU, TU, S, T("Now"), S,
|
|
||||||
T("this"), TU, S, T("is"), TU, S, T("special!")]);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// This test is for checking if non-ASCII characters get parsed correctly.
|
|
||||||
#[test]
|
|
||||||
fn tokenize_unicode() {
|
|
||||||
test("[document][Hello 🌍!]",
|
|
||||||
vec![L, T("document"), R, L, T("Hello"), S, T("🌍!"), R]);
|
|
||||||
test("[f]⺐.", vec![L, T("f"), R, T("⺐.")]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
mod parse_tests {
|
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::func::{Function, Scope};
|
use crate::func::{Function, Scope};
|
||||||
use crate::layout::{LayoutContext, LayoutResult, Layout};
|
use crate::layout::{LayoutContext, LayoutResult, Layout};
|
465
src/parsing/tokens.rs
Normal file
465
src/parsing/tokens.rs
Normal file
@ -0,0 +1,465 @@
|
|||||||
|
//! Tokenization of text.
|
||||||
|
|
||||||
|
use std::str::CharIndices;
|
||||||
|
use smallvec::SmallVec;
|
||||||
|
use crate::syntax::*;
|
||||||
|
|
||||||
|
|
||||||
|
/// Builds an iterator over the tokens of the source code.
|
||||||
|
#[inline]
|
||||||
|
pub fn tokenize(src: &str) -> Tokens {
|
||||||
|
Tokens::new(src)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// An iterator over the tokens of source code.
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct Tokens<'s> {
|
||||||
|
src: &'s str,
|
||||||
|
pub(in super) chars: PeekableChars<'s>,
|
||||||
|
state: TokensState,
|
||||||
|
stack: SmallVec<[TokensState; 1]>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The state the tokenizer is in.
|
||||||
|
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
|
||||||
|
enum TokensState {
|
||||||
|
/// The base state if there is nothing special we are in.
|
||||||
|
Body,
|
||||||
|
/// Inside a function header. Here colons and equal signs get parsed
|
||||||
|
/// as distinct tokens rather than text.
|
||||||
|
Function,
|
||||||
|
/// We expect either the end of the function or the beginning of the body.
|
||||||
|
MaybeBody,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'s> Tokens<'s> {
|
||||||
|
/// Create a new token stream from source code.
|
||||||
|
fn new(src: &'s str) -> Tokens<'s> {
|
||||||
|
Tokens {
|
||||||
|
src,
|
||||||
|
chars: PeekableChars::new(src),
|
||||||
|
state: TokensState::Body,
|
||||||
|
stack: SmallVec::new(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Advance the iterator by one step.
|
||||||
|
fn advance(&mut self) {
|
||||||
|
self.chars.next();
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Switch to the given state.
|
||||||
|
fn switch(&mut self, state: TokensState) {
|
||||||
|
self.stack.push(self.state);
|
||||||
|
self.state = state;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Go back to the top-of-stack state.
|
||||||
|
fn unswitch(&mut self) {
|
||||||
|
self.state = self.stack.pop().unwrap_or(TokensState::Body);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Advance and return the given token.
|
||||||
|
fn consumed(&mut self, token: Token<'s>) -> Token<'s> {
|
||||||
|
self.advance();
|
||||||
|
token
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns a word containing the string bounded by the given indices.
|
||||||
|
fn text(&self, start: usize, end: usize) -> Token<'s> {
|
||||||
|
Token::Text(&self.src[start .. end])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'s> Iterator for Tokens<'s> {
|
||||||
|
type Item = Token<'s>;
|
||||||
|
|
||||||
|
/// Advance the iterator, return the next token or nothing.
|
||||||
|
fn next(&mut self) -> Option<Token<'s>> {
|
||||||
|
use TokensState as TU;
|
||||||
|
|
||||||
|
// Go to the body state if the function has a body or return to the top-of-stack state.
|
||||||
|
if self.state == TU::MaybeBody {
|
||||||
|
if self.chars.peek()?.1 == '[' {
|
||||||
|
self.state = TU::Body;
|
||||||
|
return Some(self.consumed(Token::LeftBracket));
|
||||||
|
} else {
|
||||||
|
self.unswitch();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Take the next char and peek at the one behind.
|
||||||
|
let (next_pos, next) = self.chars.next()?;
|
||||||
|
let afterwards = self.chars.peek().map(|p| p.1);
|
||||||
|
|
||||||
|
Some(match next {
|
||||||
|
// Functions
|
||||||
|
'[' => {
|
||||||
|
self.switch(TU::Function);
|
||||||
|
Token::LeftBracket
|
||||||
|
},
|
||||||
|
']' => {
|
||||||
|
if self.state == TU::Function {
|
||||||
|
self.state = TU::MaybeBody;
|
||||||
|
} else {
|
||||||
|
self.unswitch();
|
||||||
|
}
|
||||||
|
Token::RightBracket
|
||||||
|
},
|
||||||
|
|
||||||
|
// Line comment
|
||||||
|
'/' if afterwards == Some('/') => {
|
||||||
|
let mut end = self.chars.next().unwrap();
|
||||||
|
let start = end.0 + end.1.len_utf8();
|
||||||
|
|
||||||
|
while let Some((index, c)) = self.chars.peek() {
|
||||||
|
if is_newline_char(c) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
self.advance();
|
||||||
|
end = (index, c);
|
||||||
|
}
|
||||||
|
|
||||||
|
let end = end.0 + end.1.len_utf8();
|
||||||
|
Token::LineComment(&self.src[start .. end])
|
||||||
|
},
|
||||||
|
|
||||||
|
// Block comment
|
||||||
|
'/' if afterwards == Some('*') => {
|
||||||
|
let mut end = self.chars.next().unwrap();
|
||||||
|
let start = end.0 + end.1.len_utf8();
|
||||||
|
|
||||||
|
let mut nested = 0;
|
||||||
|
while let Some((index, c)) = self.chars.next() {
|
||||||
|
let after = self.chars.peek().map(|p| p.1);
|
||||||
|
match (c, after) {
|
||||||
|
('*', Some('/')) if nested == 0 => { self.advance(); break },
|
||||||
|
('/', Some('*')) => { self.advance(); nested += 1 },
|
||||||
|
('*', Some('/')) => { self.advance(); nested -= 1 },
|
||||||
|
_ => {},
|
||||||
|
}
|
||||||
|
end = (index, c);
|
||||||
|
}
|
||||||
|
|
||||||
|
let end = end.0 + end.1.len_utf8();
|
||||||
|
Token::BlockComment(&self.src[start .. end])
|
||||||
|
},
|
||||||
|
|
||||||
|
// Unexpected end of block comment
|
||||||
|
'*' if afterwards == Some('/') => self.consumed(Token::StarSlash),
|
||||||
|
|
||||||
|
// Whitespace
|
||||||
|
' ' | '\t' => {
|
||||||
|
while let Some((_, c)) = self.chars.peek() {
|
||||||
|
match c {
|
||||||
|
' ' | '\t' => self.advance(),
|
||||||
|
_ => break,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Token::Space
|
||||||
|
}
|
||||||
|
|
||||||
|
// Newlines
|
||||||
|
'\r' if afterwards == Some('\n') => self.consumed(Token::Newline),
|
||||||
|
c if is_newline_char(c) => Token::Newline,
|
||||||
|
|
||||||
|
// Star/Underscore/Backtick in bodies
|
||||||
|
'*' if self.state == TU::Body => Token::Star,
|
||||||
|
'_' if self.state == TU::Body => Token::Underscore,
|
||||||
|
'`' if self.state == TU::Body => Token::Backtick,
|
||||||
|
|
||||||
|
// Context sensitive operators in headers
|
||||||
|
':' if self.state == TU::Function => Token::Colon,
|
||||||
|
'=' if self.state == TU::Function => Token::Equals,
|
||||||
|
',' if self.state == TU::Function => Token::Comma,
|
||||||
|
|
||||||
|
// A string value.
|
||||||
|
'"' if self.state == TU::Function => {
|
||||||
|
// Find out when the word ends.
|
||||||
|
let mut escaped = false;
|
||||||
|
let mut end = (next_pos, next);
|
||||||
|
|
||||||
|
while let Some((index, c)) = self.chars.next() {
|
||||||
|
if c == '"' && !escaped {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
escaped = c == '\\';
|
||||||
|
end = (index, c);
|
||||||
|
}
|
||||||
|
|
||||||
|
let end_pos = end.0 + end.1.len_utf8();
|
||||||
|
Token::Quoted(&self.src[next_pos + 1 .. end_pos])
|
||||||
|
}
|
||||||
|
|
||||||
|
// Escaping
|
||||||
|
'\\' => {
|
||||||
|
if let Some((index, c)) = self.chars.peek() {
|
||||||
|
let escapable = match c {
|
||||||
|
'[' | ']' | '\\' | '*' | '_' | '`' | ':' | '=' | '/' => true,
|
||||||
|
_ => false,
|
||||||
|
};
|
||||||
|
|
||||||
|
if escapable {
|
||||||
|
self.advance();
|
||||||
|
return Some(self.text(index, index + c.len_utf8()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Token::Text("\\")
|
||||||
|
},
|
||||||
|
|
||||||
|
// Normal text
|
||||||
|
_ => {
|
||||||
|
// Find out when the word ends.
|
||||||
|
let mut end = (next_pos, next);
|
||||||
|
while let Some((index, c)) = self.chars.peek() {
|
||||||
|
let second = self.chars.peek_second().map(|p| p.1);
|
||||||
|
|
||||||
|
// Whether the next token is still from the text or not.
|
||||||
|
let continues = match c {
|
||||||
|
'[' | ']' | '\\' => false,
|
||||||
|
'*' | '_' | '`' if self.state == TU::Body => false,
|
||||||
|
':' | '=' | ',' | '"' if self.state == TU::Function => false,
|
||||||
|
|
||||||
|
'/' => second != Some('/') && second != Some('*'),
|
||||||
|
'*' => second != Some('/'),
|
||||||
|
|
||||||
|
' ' | '\t' => false,
|
||||||
|
c if is_newline_char(c) => false,
|
||||||
|
|
||||||
|
_ => true,
|
||||||
|
};
|
||||||
|
|
||||||
|
if !continues {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
end = (index, c);
|
||||||
|
self.advance();
|
||||||
|
}
|
||||||
|
|
||||||
|
let end_pos = end.0 + end.1.len_utf8();
|
||||||
|
self.text(next_pos, end_pos)
|
||||||
|
},
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Whether this character is a newline (or starts one).
|
||||||
|
fn is_newline_char(character: char) -> bool {
|
||||||
|
match character {
|
||||||
|
'\n' | '\r' | '\u{000c}' | '\u{0085}' | '\u{2028}' | '\u{2029}' => true,
|
||||||
|
_ => false,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A (index, char) iterator with double lookahead.
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct PeekableChars<'s> {
|
||||||
|
offset: usize,
|
||||||
|
string: &'s str,
|
||||||
|
chars: CharIndices<'s>,
|
||||||
|
peek1: Option<Option<(usize, char)>>,
|
||||||
|
peek2: Option<Option<(usize, char)>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'s> PeekableChars<'s> {
|
||||||
|
/// Create a new iterator from a string.
|
||||||
|
pub fn new(string: &'s str) -> PeekableChars<'s> {
|
||||||
|
PeekableChars {
|
||||||
|
offset: 0,
|
||||||
|
string,
|
||||||
|
chars: string.char_indices(),
|
||||||
|
peek1: None,
|
||||||
|
peek2: None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Peek at the next element.
|
||||||
|
pub fn peek(&mut self) -> Option<(usize, char)> {
|
||||||
|
match self.peek1 {
|
||||||
|
Some(peeked) => peeked,
|
||||||
|
None => {
|
||||||
|
let next = self.next_inner();
|
||||||
|
self.peek1 = Some(next);
|
||||||
|
next
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Peek at the element after the next element.
|
||||||
|
pub fn peek_second(&mut self) -> Option<(usize, char)> {
|
||||||
|
match self.peek2 {
|
||||||
|
Some(peeked) => peeked,
|
||||||
|
None => {
|
||||||
|
self.peek();
|
||||||
|
let next = self.next_inner();
|
||||||
|
self.peek2 = Some(next);
|
||||||
|
next
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Return the next value of the inner iterator mapped with the offset.
|
||||||
|
pub fn next_inner(&mut self) -> Option<(usize, char)> {
|
||||||
|
self.chars.next().map(|(i, c)| (i + self.offset, c))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The index of the first character of the next token in the source string.
|
||||||
|
pub fn current_index(&mut self) -> Option<usize> {
|
||||||
|
self.peek().map(|p| p.0)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Go to a new position in the underlying string.
|
||||||
|
pub fn goto(&mut self, index: usize) {
|
||||||
|
self.offset = index;
|
||||||
|
self.chars = self.string[index..].char_indices();
|
||||||
|
self.peek1 = None;
|
||||||
|
self.peek2 = None;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Iterator for PeekableChars<'_> {
|
||||||
|
type Item = (usize, char);
|
||||||
|
|
||||||
|
fn next(&mut self) -> Option<(usize, char)> {
|
||||||
|
match self.peek1.take() {
|
||||||
|
Some(value) => {
|
||||||
|
self.peek1 = self.peek2.take();
|
||||||
|
value
|
||||||
|
},
|
||||||
|
None => self.next_inner(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
use Token::{Space as S, Newline as N, LeftBracket as L, RightBracket as R,
|
||||||
|
Colon as C, Equals as E, Quoted as Q, Underscore as TU, Star as TS,
|
||||||
|
Backtick as TB, Text as T, LineComment as LC, BlockComment as BC,
|
||||||
|
StarSlash as SS};
|
||||||
|
|
||||||
|
/// Test if the source code tokenizes to the tokens.
|
||||||
|
fn test(src: &str, tokens: Vec<Token>) {
|
||||||
|
assert_eq!(Tokens::new(src).collect::<Vec<_>>(), tokens);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Tokenizes the basic building blocks.
|
||||||
|
#[test]
|
||||||
|
fn tokenize_base() {
|
||||||
|
test("", vec![]);
|
||||||
|
test("Hallo", vec![T("Hallo")]);
|
||||||
|
test("[", vec![L]);
|
||||||
|
test("]", vec![R]);
|
||||||
|
test("*", vec![TS]);
|
||||||
|
test("_", vec![TU]);
|
||||||
|
test("`", vec![TB]);
|
||||||
|
test("\n", vec![N]);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// This test looks if LF- and CRLF-style newlines get both identified correctly.
|
||||||
|
#[test]
|
||||||
|
fn tokenize_whitespace_newlines() {
|
||||||
|
test(" \t", vec![S]);
|
||||||
|
test("First line\r\nSecond line\nThird line\n",
|
||||||
|
vec![T("First"), S, T("line"), N, T("Second"), S, T("line"), N,
|
||||||
|
T("Third"), S, T("line"), N]);
|
||||||
|
test("Hello \n ", vec![T("Hello"), S, N, S]);
|
||||||
|
test("Dense\nTimes", vec![T("Dense"), N, T("Times")]);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Tests if escaping with backslash works as it should.
|
||||||
|
#[test]
|
||||||
|
fn tokenize_escape() {
|
||||||
|
test(r"\[", vec![T("[")]);
|
||||||
|
test(r"\]", vec![T("]")]);
|
||||||
|
test(r"\**", vec![T("*"), TS]);
|
||||||
|
test(r"\*", vec![T("*")]);
|
||||||
|
test(r"\__", vec![T("_"), TU]);
|
||||||
|
test(r"\_", vec![T("_")]);
|
||||||
|
test(r"\hello", vec![T("\\"), T("hello")]);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Tests if escaped strings work.
|
||||||
|
#[test]
|
||||||
|
fn tokenize_quoted() {
|
||||||
|
test(r#"[align: "hello\"world"]"#, vec![L, T("align"), C, S, Q(r#"hello\"world"#), R]);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Tokenizes some more realistic examples.
|
||||||
|
#[test]
|
||||||
|
fn tokenize_examples() {
|
||||||
|
test(r"
|
||||||
|
[function][
|
||||||
|
Test [italic][example]!
|
||||||
|
]
|
||||||
|
", vec![
|
||||||
|
N, S, L, T("function"), R, L, N, S, T("Test"), S, L, T("italic"), R, L,
|
||||||
|
T("example"), R, T("!"), N, S, R, N, S
|
||||||
|
]);
|
||||||
|
|
||||||
|
test(r"
|
||||||
|
[page: size=A4]
|
||||||
|
[font: size=12pt]
|
||||||
|
|
||||||
|
Das ist ein Beispielsatz mit *fetter* Schrift.
|
||||||
|
", vec![
|
||||||
|
N, S, L, T("page"), C, S, T("size"), E, T("A4"), R, N, S,
|
||||||
|
L, T("font"), C, S, T("size"), E, T("12pt"), R, N, N, S,
|
||||||
|
T("Das"), S, T("ist"), S, T("ein"), S, T("Beispielsatz"), S, T("mit"), S,
|
||||||
|
TS, T("fetter"), TS, S, T("Schrift."), N, S
|
||||||
|
]);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// This test checks whether the colon and equals symbols get parsed correctly depending on the
|
||||||
|
/// context: Either in a function header or in a body.
|
||||||
|
#[test]
|
||||||
|
fn tokenize_symbols_context() {
|
||||||
|
test("[func: key=value][Answer: 7]",
|
||||||
|
vec![L, T("func"), C, S, T("key"), E, T("value"), R, L,
|
||||||
|
T("Answer:"), S, T("7"), R]);
|
||||||
|
test("[[n: k=v]:x][:[=]]:=",
|
||||||
|
vec![L, L, T("n"), C, S, T("k"), E, T("v"), R, C, T("x"), R,
|
||||||
|
L, T(":"), L, E, R, R, T(":=")]);
|
||||||
|
test("[hi: k=[func][body] v=1][hello]",
|
||||||
|
vec![L, T("hi"), C, S, T("k"), E, L, T("func"), R, L, T("body"), R, S,
|
||||||
|
T("v"), E, T("1"), R, L, T("hello"), R]);
|
||||||
|
test("[func: __key__=value]",
|
||||||
|
vec![L, T("func"), C, S, T("__key__"), E, T("value"), R]);
|
||||||
|
test("The /*[*/ answer: 7.",
|
||||||
|
vec![T("The"), S, BC("["), S, T("answer:"), S, T("7.")]);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Test if block and line comments get tokenized as expected.
|
||||||
|
#[test]
|
||||||
|
fn tokenize_comments() {
|
||||||
|
test("These // Line comments.",
|
||||||
|
vec![T("These"), S, LC(" Line comments.")]);
|
||||||
|
test("This /* is */ a comment.",
|
||||||
|
vec![T("This"), S, BC(" is "), S, T("a"), S, T("comment.")]);
|
||||||
|
test("[Head/*of*/][Body]", vec![L, T("Head"), BC("of"), R, L, T("Body"), R]);
|
||||||
|
test("/* Hey */ */", vec![BC(" Hey "), S, SS]);
|
||||||
|
test("Hey\n// Yoo /*\n*/", vec![T("Hey"), N, LC(" Yoo /*"), N, SS]);
|
||||||
|
test("/* My /* line // */ comment */", vec![BC(" My /* line // */ comment ")])
|
||||||
|
}
|
||||||
|
|
||||||
|
/// This test has a special look at the underscore syntax.
|
||||||
|
#[test]
|
||||||
|
fn tokenize_underscores() {
|
||||||
|
test("he_llo_world_ __ Now this_ is_ special!",
|
||||||
|
vec![T("he"), TU, T("llo"), TU, T("world"), TU, S, TU, TU, S, T("Now"), S,
|
||||||
|
T("this"), TU, S, T("is"), TU, S, T("special!")]);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// This test is for checking if non-ASCII characters get parsed correctly.
|
||||||
|
#[test]
|
||||||
|
fn tokenize_unicode() {
|
||||||
|
test("[document][Hello 🌍!]",
|
||||||
|
vec![L, T("document"), R, L, T("Hello"), S, T("🌍!"), R]);
|
||||||
|
test("[f]⺐.", vec![L, T("f"), R, T("⺐.")]);
|
||||||
|
}
|
||||||
|
}
|
82
tests/layouting.rs
Normal file
82
tests/layouting.rs
Normal file
@ -0,0 +1,82 @@
|
|||||||
|
use std::fs::{self, File};
|
||||||
|
use std::io::{Write, Read, BufWriter};
|
||||||
|
use std::process::Command;
|
||||||
|
|
||||||
|
use typst::Typesetter;
|
||||||
|
use typst::toddle::query::FileSystemFontProvider;
|
||||||
|
use typst::export::pdf::PdfExporter;
|
||||||
|
use typst::doc::LayoutAction;
|
||||||
|
|
||||||
|
const CACHE_DIR: &str = "test-cache";
|
||||||
|
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn layouting() {
|
||||||
|
fs::create_dir_all(format!("{}/serialized", CACHE_DIR)).unwrap();
|
||||||
|
fs::create_dir_all(format!("{}/rendered", CACHE_DIR)).unwrap();
|
||||||
|
fs::create_dir_all(format!("{}/pdf", CACHE_DIR)).unwrap();
|
||||||
|
|
||||||
|
for entry in fs::read_dir("tests/layouts/").unwrap() {
|
||||||
|
let path = entry.unwrap().path();
|
||||||
|
|
||||||
|
let mut file = File::open(&path).unwrap();
|
||||||
|
let mut src = String::new();
|
||||||
|
file.read_to_string(&mut src).unwrap();
|
||||||
|
|
||||||
|
let name = path
|
||||||
|
.file_stem().unwrap()
|
||||||
|
.to_str().unwrap();
|
||||||
|
|
||||||
|
test(name, &src);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Create a _PDF_ with a name from the source code.
|
||||||
|
fn test(name: &str, src: &str) {
|
||||||
|
let mut typesetter = Typesetter::new();
|
||||||
|
let provider = FileSystemFontProvider::from_listing("fonts/fonts.toml").unwrap();
|
||||||
|
typesetter.add_font_provider(provider.clone());
|
||||||
|
|
||||||
|
// Layout into box layout.
|
||||||
|
let tree = typesetter.parse(src).unwrap();
|
||||||
|
let layout = typesetter.layout(&tree).unwrap();
|
||||||
|
|
||||||
|
// Write the serialed layout file.
|
||||||
|
let path = format!("{}/serialized/{}.box", CACHE_DIR, name);
|
||||||
|
let mut file = File::create(path).unwrap();
|
||||||
|
|
||||||
|
// Find all used fonts and their filenames.
|
||||||
|
let mut map = Vec::new();
|
||||||
|
let mut loader = typesetter.loader().borrow_mut();
|
||||||
|
for action in &layout.actions {
|
||||||
|
if let LayoutAction::SetFont(index, _) = action {
|
||||||
|
if map.iter().find(|(i, _)| i == index).is_none() {
|
||||||
|
let (_, provider_index) = loader.get_provider_and_index(*index);
|
||||||
|
let filename = provider.get_path(provider_index).to_str().unwrap();
|
||||||
|
map.push((*index, filename));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
drop(loader);
|
||||||
|
|
||||||
|
// Write the font mapping into the serialization file.
|
||||||
|
writeln!(file, "{}", map.len()).unwrap();
|
||||||
|
for (index, path) in map {
|
||||||
|
writeln!(file, "{} {}", index, path).unwrap();
|
||||||
|
}
|
||||||
|
layout.serialize(&mut file).unwrap();
|
||||||
|
|
||||||
|
// Render the layout into a PNG.
|
||||||
|
Command::new("python")
|
||||||
|
.arg("tests/render.py")
|
||||||
|
.arg(name)
|
||||||
|
.spawn()
|
||||||
|
.expect("failed to run python-based renderer");
|
||||||
|
|
||||||
|
// Write the PDF file.
|
||||||
|
let path = format!("{}/pdf/{}.pdf", CACHE_DIR, name);
|
||||||
|
let file = BufWriter::new(File::create(path).unwrap());
|
||||||
|
let document = layout.into_doc();
|
||||||
|
let exporter = PdfExporter::new();
|
||||||
|
exporter.export(&document, typesetter.loader(), file).unwrap();
|
||||||
|
}
|
88
tests/layouts/shakespeare-right.tps
Normal file
88
tests/layouts/shakespeare-right.tps
Normal file
@ -0,0 +1,88 @@
|
|||||||
|
[align: right][
|
||||||
|
[bold][Scene 5: _The Tower of London_]
|
||||||
|
|
||||||
|
[italic][Enter Mortimer, brought in a chair, and Gaolers.]
|
||||||
|
|
||||||
|
*Mortimer.* Kind keepers of my weak decaying age,
|
||||||
|
Let dying Mortimer here rest himself.
|
||||||
|
Even like a man new haled from the rack,
|
||||||
|
So fare my limbs with long imprisonment;
|
||||||
|
And these grey locks, the pursuivants of death,
|
||||||
|
Nestor-like aged in an age of care,
|
||||||
|
Argue the end of Edmund Mortimer.
|
||||||
|
These eyes, like lamps whose wasting oil is spent,
|
||||||
|
Wax dim, as drawing to their exigent;
|
||||||
|
Weak shoulders, overborne with burdening grief,
|
||||||
|
And pithless arms, like to a withered vine
|
||||||
|
That droops his sapless branches to the ground.
|
||||||
|
Yet are these feet, whose strengthless stay is numb,
|
||||||
|
Unable to support this lump of clay,
|
||||||
|
Swift-winged with desire to get a grave,
|
||||||
|
As witting I no other comfort have.
|
||||||
|
But tell me, keeper, will my nephew come?
|
||||||
|
|
||||||
|
*First Keeper.* Richard Plantagenet, my lord, will come.
|
||||||
|
We sent unto the Temple, unto his chamber;
|
||||||
|
And answer was return'd that he will come.
|
||||||
|
|
||||||
|
*Mortimer.* Enough; my soul shall then be satisfied.
|
||||||
|
Poor gentleman! his wrong doth equal mine.
|
||||||
|
Since Henry Monmouth first began to reign,
|
||||||
|
Before whose glory I was great in arms,
|
||||||
|
This loathsome sequestration have I had;
|
||||||
|
And even since then hath Richard been obscur'd,
|
||||||
|
Depriv'd of honour and inheritance.
|
||||||
|
But now the arbitrator of despairs,
|
||||||
|
Just Death, kind umpire of men's miseries,
|
||||||
|
With sweet enlargement doth dismiss me hence.
|
||||||
|
I would his troubles likewise were expir'd,
|
||||||
|
That so he might recover what was lost.
|
||||||
|
|
||||||
|
|
||||||
|
[italic][Enter Richard Plantagenet]
|
||||||
|
|
||||||
|
*First Keeper.* My lord, your loving nephew now is come.
|
||||||
|
|
||||||
|
*Mortimer.* Richard Plantagenet, my friend, is he come?
|
||||||
|
|
||||||
|
*Plantagenet.* Ay, noble uncle, thus ignobly us'd,
|
||||||
|
Your nephew, late despised Richard, comes.
|
||||||
|
|
||||||
|
*Mortimer.* Direct mine arms I may embrace his neck
|
||||||
|
And in his bosom spend my latter gasp.
|
||||||
|
O, tell me when my lips do touch his cheeks,
|
||||||
|
That I may kindly give one fainting kiss.
|
||||||
|
And now declare, sweet stem from York's great stock,
|
||||||
|
Why didst thou say of late thou wert despis'd?
|
||||||
|
|
||||||
|
*Plantagenet.* First, lean thine aged back against mine arm;
|
||||||
|
And, in that ease, I'll tell thee my disease.
|
||||||
|
This day, in argument upon a case,
|
||||||
|
Some words there grew 'twixt Somerset and me;
|
||||||
|
Among which terms he us'd his lavish tongue
|
||||||
|
And did upbraid me with my father's death;
|
||||||
|
Which obloquy set bars before my tongue,
|
||||||
|
Else with the like I had requited him.
|
||||||
|
Therefore, good uncle, for my father's sake,
|
||||||
|
In honour of a true Plantagenet,
|
||||||
|
And for alliance sake, declare the cause
|
||||||
|
My father, Earl of Cambridge, lost his head.
|
||||||
|
|
||||||
|
*Mortimer.* That cause, fair nephew, that imprison'd me
|
||||||
|
And hath detain'd me all my flow'ring youth
|
||||||
|
Within a loathsome dungeon, there to pine,
|
||||||
|
Was cursed instrument of his decease.
|
||||||
|
|
||||||
|
*Plantagenet.* Discover more at large what cause that was,
|
||||||
|
For I am ignorant and cannot guess.
|
||||||
|
|
||||||
|
*Mortimer.* I will, if that my fading breath permit
|
||||||
|
And death approach not ere my tale be done.
|
||||||
|
Henry the Fourth, grandfather to this king,
|
||||||
|
Depos'd his nephew Richard, Edward's son,
|
||||||
|
The first-begotten and the lawful heir
|
||||||
|
Of Edward king, the third of that descent;
|
||||||
|
During whose reign the Percies of the north,
|
||||||
|
Finding his usurpation most unjust,
|
||||||
|
Endeavour'd my advancement to the throne ...
|
||||||
|
]
|
13
tests/layouts/styles.tps
Normal file
13
tests/layouts/styles.tps
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
_Multiline:_
|
||||||
|
Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy
|
||||||
|
eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam
|
||||||
|
voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet
|
||||||
|
clita kasd gubergren, no sea takimata sanctus est.
|
||||||
|
|
||||||
|
_Emoji:_ Hello World! 🌍
|
||||||
|
|
||||||
|
_Styles:_ This is made *bold*, that _italic_ and this one `monospace` using the
|
||||||
|
built-in syntax!
|
||||||
|
|
||||||
|
_Styles with functions:_ This [bold][word] is made bold and [italic][that] is italic
|
||||||
|
using the standard library functions [mono][bold] and `italic`!
|
73
tests/render.py
Normal file
73
tests/render.py
Normal file
@ -0,0 +1,73 @@
|
|||||||
|
import sys
|
||||||
|
import os
|
||||||
|
import pathlib
|
||||||
|
from PIL import Image, ImageDraw, ImageFont
|
||||||
|
|
||||||
|
|
||||||
|
BASE = os.path.dirname(__file__)
|
||||||
|
CACHE_DIR = os.path.join(BASE, "../test-cache/");
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
assert len(sys.argv) == 2, "usage: python render.py <name>"
|
||||||
|
name = sys.argv[1]
|
||||||
|
|
||||||
|
filename = os.path.join(CACHE_DIR, f"serialized/{name}.box")
|
||||||
|
with open(filename, encoding="utf-8") as file:
|
||||||
|
lines = [line[:-1] for line in file.readlines()]
|
||||||
|
|
||||||
|
fonts = {}
|
||||||
|
font_count = int(lines[0])
|
||||||
|
for i in range(font_count):
|
||||||
|
parts = lines[1 + i].split(' ', 1)
|
||||||
|
index = int(parts[0])
|
||||||
|
path = parts[1]
|
||||||
|
fonts[index] = os.path.join(BASE, "../fonts", path)
|
||||||
|
|
||||||
|
width, height = (float(s) for s in lines[font_count + 1].split())
|
||||||
|
|
||||||
|
renderer = Renderer(fonts, width, height)
|
||||||
|
for command in lines[font_count + 2:]:
|
||||||
|
renderer.execute(command)
|
||||||
|
|
||||||
|
pathlib.Path(os.path.join(CACHE_DIR, "rendered")).mkdir(parents=True, exist_ok=True)
|
||||||
|
renderer.export(name)
|
||||||
|
|
||||||
|
|
||||||
|
class Renderer:
|
||||||
|
def __init__(self, fonts, width, height):
|
||||||
|
self.fonts = fonts
|
||||||
|
self.img = Image.new("RGBA", (pix(width), pix(height)), (255, 255, 255))
|
||||||
|
self.draw = ImageDraw.Draw(self.img)
|
||||||
|
self.cursor = (0, 0)
|
||||||
|
|
||||||
|
def execute(self, command):
|
||||||
|
cmd = command[0]
|
||||||
|
parts = command.split()[1:]
|
||||||
|
|
||||||
|
if cmd == 'm':
|
||||||
|
x, y = (pix(float(s)) for s in parts)
|
||||||
|
self.cursor = (x, y)
|
||||||
|
|
||||||
|
elif cmd == 'f':
|
||||||
|
index = int(parts[0])
|
||||||
|
size = pix(float(parts[1]))
|
||||||
|
self.font = ImageFont.truetype(self.fonts[index], size)
|
||||||
|
|
||||||
|
elif cmd == 'w':
|
||||||
|
text = command[2:]
|
||||||
|
self.draw.text(self.cursor, text, (0, 0, 0), font=self.font)
|
||||||
|
|
||||||
|
else:
|
||||||
|
raise Exception("invalid command")
|
||||||
|
|
||||||
|
def export(self, name):
|
||||||
|
self.img.save(CACHE_DIR + "rendered/" + name + ".png")
|
||||||
|
|
||||||
|
|
||||||
|
def pix(points):
|
||||||
|
return int(2 * points)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
Loading…
x
Reference in New Issue
Block a user