mirror of
https://github.com/typst/typst
synced 2025-05-13 20:46:23 +08:00
Move crate into workspace subfolder
This commit is contained in:
commit
5a600eb354
10
Cargo.toml
Normal file
10
Cargo.toml
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
[package]
|
||||||
|
name = "typeset"
|
||||||
|
version = "0.1.0"
|
||||||
|
authors = ["Laurenz Mädje <laurmaedje@gmail.com>"]
|
||||||
|
edition = "2018"
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
unicode-segmentation = "1.2"
|
||||||
|
unicode-xid = "0.1.0"
|
||||||
|
byteorder = "1"
|
187
src/doc.rs
Normal file
187
src/doc.rs
Normal file
@ -0,0 +1,187 @@
|
|||||||
|
//! Generation of abstract documents from syntax trees.
|
||||||
|
|
||||||
|
use std::fmt;
|
||||||
|
use crate::parsing::{SyntaxTree, Node};
|
||||||
|
use crate::font::{Font, BuiltinFont};
|
||||||
|
|
||||||
|
|
||||||
|
/// Abstract representation of a complete typesetted document.
|
||||||
|
///
|
||||||
|
/// This abstract thing can then be serialized into a specific format like PDF.
|
||||||
|
#[derive(Debug, Clone, PartialEq)]
|
||||||
|
pub struct Document {
|
||||||
|
/// The pages of the document.
|
||||||
|
pub pages: Vec<Page>,
|
||||||
|
/// The fonts used by the document.
|
||||||
|
pub fonts: Vec<DocumentFont>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Document {
|
||||||
|
/// Create a new document without content.
|
||||||
|
pub fn new() -> Document {
|
||||||
|
Document {
|
||||||
|
pages: vec![],
|
||||||
|
fonts: vec![],
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A page of a document.
|
||||||
|
#[derive(Debug, Clone, PartialEq)]
|
||||||
|
pub struct Page {
|
||||||
|
/// The width and height of the page.
|
||||||
|
pub size: [Size; 2],
|
||||||
|
/// The contents of the page.
|
||||||
|
pub contents: Vec<Text>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Plain text.
|
||||||
|
#[derive(Debug, Clone, Eq, PartialEq)]
|
||||||
|
pub struct Text(pub String);
|
||||||
|
|
||||||
|
/// A font (either built-in or external).
|
||||||
|
#[derive(Debug, Clone, PartialEq)]
|
||||||
|
pub enum DocumentFont {
|
||||||
|
/// One of the 14 built-in fonts.
|
||||||
|
Builtin(BuiltinFont),
|
||||||
|
/// An externally loaded font.
|
||||||
|
Loaded(Font),
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A distance that can be created from different units of length.
|
||||||
|
#[derive(Debug, Copy, Clone, PartialEq)]
|
||||||
|
pub struct Size {
|
||||||
|
/// The size in typographic points (1/72 inches).
|
||||||
|
pub points: f32,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Size {
|
||||||
|
/// Create a size from a number of points.
|
||||||
|
pub fn from_points(points: f32) -> Size {
|
||||||
|
Size { points }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Create a size from a number of inches.
|
||||||
|
pub fn from_inches(inches: f32) -> Size {
|
||||||
|
Size { points: inches / 72.0 }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Create a size from a number of millimeters.
|
||||||
|
pub fn from_mm(mm: f32) -> Size {
|
||||||
|
Size { points: 2.8345 * mm }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Create a size from a number of centimeters.
|
||||||
|
pub fn from_cm(cm: f32) -> Size {
|
||||||
|
Size { points: 0.028345 * cm }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/// A type that can be generated into a document.
|
||||||
|
pub trait Generate {
|
||||||
|
/// Generate a document from self.
|
||||||
|
fn generate(self) -> GenResult<Document>;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Generate for SyntaxTree<'_> {
|
||||||
|
fn generate(self) -> GenResult<Document> {
|
||||||
|
Generator::new(self).generate()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Result type used for parsing.
|
||||||
|
type GenResult<T> = std::result::Result<T, GenerationError>;
|
||||||
|
|
||||||
|
/// A failure when generating.
|
||||||
|
#[derive(Debug, Clone, Eq, PartialEq)]
|
||||||
|
pub struct GenerationError {
|
||||||
|
/// A message describing the error.
|
||||||
|
pub message: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Display for GenerationError {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
|
write!(f, "generation error: {}", self.message)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/// Transforms an abstract syntax tree into a document.
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
struct Generator<'s> {
|
||||||
|
tree: SyntaxTree<'s>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'s> Generator<'s> {
|
||||||
|
/// Create a new generator from a syntax tree.
|
||||||
|
fn new(tree: SyntaxTree<'s>) -> Generator<'s> {
|
||||||
|
Generator { tree }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Generate the abstract document.
|
||||||
|
fn generate(&mut self) -> GenResult<Document> {
|
||||||
|
let fonts = vec![DocumentFont::Builtin(BuiltinFont::Helvetica)];
|
||||||
|
|
||||||
|
let mut text = String::new();
|
||||||
|
for node in &self.tree.nodes {
|
||||||
|
match node {
|
||||||
|
Node::Space if !text.is_empty() => text.push(' '),
|
||||||
|
Node::Space | Node::Newline => (),
|
||||||
|
Node::Word(word) => text.push_str(word),
|
||||||
|
|
||||||
|
Node::ToggleItalics | Node::ToggleBold | Node::ToggleMath => unimplemented!(),
|
||||||
|
Node::Func(_) => unimplemented!(),
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let page = Page {
|
||||||
|
size: [Size::from_mm(210.0), Size::from_mm(297.0)],
|
||||||
|
contents: vec![ Text(text) ],
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok(Document {
|
||||||
|
pages: vec![page],
|
||||||
|
fonts,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Gives a generation error with a message.
|
||||||
|
#[inline]
|
||||||
|
fn err<R, S: Into<String>>(&self, message: S) -> GenResult<R> {
|
||||||
|
Err(GenerationError { message: message.into() })
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod generator_tests {
|
||||||
|
use super::*;
|
||||||
|
use crate::parsing::{Tokenize, Parse};
|
||||||
|
|
||||||
|
/// Test if the source gets generated into the document.
|
||||||
|
fn test(src: &str, doc: Document) {
|
||||||
|
assert_eq!(src.tokenize().parse().unwrap().generate(), Ok(doc));
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Test if generation gives this error for the source code.
|
||||||
|
fn test_err(src: &str, err: GenerationError) {
|
||||||
|
assert_eq!(src.tokenize().parse().unwrap().generate(), Err(err));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn generator_simple() {
|
||||||
|
test("This is an example of a sentence.", Document {
|
||||||
|
pages: vec![
|
||||||
|
Page {
|
||||||
|
size: [Size::from_mm(210.0), Size::from_mm(297.0)],
|
||||||
|
contents: vec![
|
||||||
|
Text("This is an example of a sentence.".to_owned()),
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
fonts: vec![DocumentFont::Builtin(BuiltinFont::Helvetica)],
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
270
src/font.rs
Normal file
270
src/font.rs
Normal file
@ -0,0 +1,270 @@
|
|||||||
|
//! Reading of metrics and font data from _OpenType_ and _TrueType_ font files.
|
||||||
|
|
||||||
|
#![allow(unused_variables)]
|
||||||
|
|
||||||
|
use std::fmt;
|
||||||
|
use std::io::{self, Read, Seek, SeekFrom};
|
||||||
|
use byteorder::{BE, ReadBytesExt};
|
||||||
|
|
||||||
|
|
||||||
|
/// A loaded opentype (or truetype) font.
|
||||||
|
#[derive(Debug, Clone, PartialEq)]
|
||||||
|
pub struct Font {
|
||||||
|
/// The PostScript name of this font.
|
||||||
|
pub name: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Font {
|
||||||
|
/// Create a new font from a byte source.
|
||||||
|
pub fn new<R>(data: &mut R) -> FontResult<Font> where R: Read + Seek {
|
||||||
|
OpenTypeReader::new(data).read()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Built-in fonts.
|
||||||
|
#[derive(Debug, Copy, Clone, PartialEq)]
|
||||||
|
#[allow(missing_docs)]
|
||||||
|
pub enum BuiltinFont {
|
||||||
|
Courier,
|
||||||
|
CourierBold,
|
||||||
|
CourierOblique,
|
||||||
|
CourierBoldOblique,
|
||||||
|
Helvetica,
|
||||||
|
HelveticaBold,
|
||||||
|
HelveticaOblique,
|
||||||
|
HelveticaBoldOblique,
|
||||||
|
TimesRoman,
|
||||||
|
TimesBold,
|
||||||
|
TimeItalic,
|
||||||
|
TimeBoldItalic,
|
||||||
|
Symbol,
|
||||||
|
ZapfDingbats,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl BuiltinFont {
|
||||||
|
/// The name of the font.
|
||||||
|
pub fn name(&self) -> &'static str {
|
||||||
|
use BuiltinFont::*;
|
||||||
|
match self {
|
||||||
|
Courier => "Courier",
|
||||||
|
CourierBold => "Courier-Bold",
|
||||||
|
CourierOblique => "Courier-Oblique",
|
||||||
|
CourierBoldOblique => "Courier-BoldOblique",
|
||||||
|
Helvetica => "Helvetica",
|
||||||
|
HelveticaBold => "Helvetica-Bold",
|
||||||
|
HelveticaOblique => "Helvetica-Oblique",
|
||||||
|
HelveticaBoldOblique => "Helvetica-BoldOblique",
|
||||||
|
TimesRoman => "Times-Roman",
|
||||||
|
TimesBold => "Times-Bold",
|
||||||
|
TimeItalic => "Time-Italic",
|
||||||
|
TimeBoldItalic => "Time-BoldItalic",
|
||||||
|
Symbol => "Symbol",
|
||||||
|
ZapfDingbats => "ZapfDingbats",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/// Result type used for tokenization.
|
||||||
|
type FontResult<T> = std::result::Result<T, LoadingError>;
|
||||||
|
|
||||||
|
/// A failure when loading a font.
|
||||||
|
#[derive(Debug, Clone, Eq, PartialEq)]
|
||||||
|
pub struct LoadingError {
|
||||||
|
/// A message describing the error.
|
||||||
|
pub message: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<io::Error> for LoadingError {
|
||||||
|
fn from(err: io::Error) -> LoadingError {
|
||||||
|
LoadingError { message: format!("io error: {}", err) }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Display for LoadingError {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
|
write!(f, "font loading error: {}", self.message)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/// Reads a font from a _OpenType_ or _TrueType_ font file.
|
||||||
|
struct OpenTypeReader<'r, R> where R: Read + Seek {
|
||||||
|
data: &'r mut R,
|
||||||
|
font: Font,
|
||||||
|
table_records: Vec<TableRecord>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Used to identify a table, design-variation axis, script,
|
||||||
|
/// language system, feature, or baseline.
|
||||||
|
#[derive(Clone, PartialEq)]
|
||||||
|
struct Tag(pub [u8; 4]);
|
||||||
|
|
||||||
|
impl PartialEq<&str> for Tag {
|
||||||
|
fn eq(&self, other: &&str) -> bool {
|
||||||
|
other.as_bytes() == &self.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Debug for Tag {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
|
write!(f, "\"{}\"", self)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Display for Tag {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
|
let a = self.0;
|
||||||
|
write!(f, "{}{}{}{}", a[0] as char, a[1] as char, a[2] as char, a[3] as char)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Stores information about one table.
|
||||||
|
#[derive(Debug, Clone, PartialEq)]
|
||||||
|
struct TableRecord {
|
||||||
|
table: Tag,
|
||||||
|
check_sum: u32,
|
||||||
|
offset: u32,
|
||||||
|
length: u32,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'r, R> OpenTypeReader<'r, R> where R: Read + Seek {
|
||||||
|
/// Create a new reader from a byte source.
|
||||||
|
pub fn new(data: &'r mut R) -> OpenTypeReader<'r, R> {
|
||||||
|
OpenTypeReader {
|
||||||
|
data,
|
||||||
|
font: Font {
|
||||||
|
name: String::new(),
|
||||||
|
},
|
||||||
|
table_records: vec![],
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Read the font from the byte source.
|
||||||
|
pub fn read(mut self) -> FontResult<Font> {
|
||||||
|
self.read_table_records()?;
|
||||||
|
self.read_name_table()?;
|
||||||
|
|
||||||
|
Ok(self.font)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Read the offset table.
|
||||||
|
fn read_table_records(&mut self) -> FontResult<()> {
|
||||||
|
let sfnt_version = self.data.read_u32::<BE>()?;
|
||||||
|
let num_tables = self.data.read_u16::<BE>()?;
|
||||||
|
let search_range = self.data.read_u16::<BE>()?;
|
||||||
|
let entry_selector = self.data.read_u16::<BE>()?;
|
||||||
|
let range_shift = self.data.read_u16::<BE>()?;
|
||||||
|
|
||||||
|
let outlines = match sfnt_version {
|
||||||
|
0x00010000 => "truetype",
|
||||||
|
0x4F54544F => "cff",
|
||||||
|
_ => return self.err("unsuported font outlines"),
|
||||||
|
};
|
||||||
|
|
||||||
|
for _ in 0 .. num_tables {
|
||||||
|
let table = self.read_tag()?;
|
||||||
|
let check_sum = self.data.read_u32::<BE>()?;
|
||||||
|
let offset = self.data.read_u32::<BE>()?;
|
||||||
|
let length = self.data.read_u32::<BE>()?;
|
||||||
|
|
||||||
|
self.table_records.push(TableRecord {
|
||||||
|
table,
|
||||||
|
check_sum,
|
||||||
|
offset,
|
||||||
|
length,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Read the name table (gives general information about the font).
|
||||||
|
fn read_name_table(&mut self) -> FontResult<()> {
|
||||||
|
let table = match self.table_records.iter().find(|record| record.table == "name") {
|
||||||
|
Some(table) => table,
|
||||||
|
None => return self.err("missing 'name' table"),
|
||||||
|
};
|
||||||
|
|
||||||
|
self.data.seek(SeekFrom::Start(table.offset as u64))?;
|
||||||
|
|
||||||
|
let format = self.data.read_u16::<BE>()?;
|
||||||
|
let count = self.data.read_u16::<BE>()?;
|
||||||
|
let string_offset = self.data.read_u16::<BE>()?;
|
||||||
|
|
||||||
|
let storage = (table.offset + string_offset as u32) as u64;
|
||||||
|
|
||||||
|
let mut name = None;
|
||||||
|
|
||||||
|
for _ in 0 .. count {
|
||||||
|
let platform_id = self.data.read_u16::<BE>()?;
|
||||||
|
let encoding_id = self.data.read_u16::<BE>()?;
|
||||||
|
let language_id = self.data.read_u16::<BE>()?;
|
||||||
|
let name_id = self.data.read_u16::<BE>()?;
|
||||||
|
let length = self.data.read_u16::<BE>()?;
|
||||||
|
let offset = self.data.read_u16::<BE>()?;
|
||||||
|
|
||||||
|
// Postscript name is what we are interested in
|
||||||
|
if name_id == 6 && platform_id == 3 && encoding_id == 1 {
|
||||||
|
if length % 2 != 0 {
|
||||||
|
return self.err("invalid encoded name");
|
||||||
|
}
|
||||||
|
|
||||||
|
self.data.seek(SeekFrom::Start(storage + offset as u64))?;
|
||||||
|
let mut buffer = Vec::with_capacity(length as usize / 2);
|
||||||
|
|
||||||
|
for _ in 0 .. length / 2 {
|
||||||
|
buffer.push(self.data.read_u16::<BE>()?);
|
||||||
|
}
|
||||||
|
|
||||||
|
name = match String::from_utf16(&buffer) {
|
||||||
|
Ok(string) => Some(string),
|
||||||
|
Err(_) => return self.err("invalid encoded name"),
|
||||||
|
};
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
self.font.name = match name {
|
||||||
|
Some(name) => name,
|
||||||
|
None => return self.err("missing postscript font name"),
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Read a tag (array of four u8's).
|
||||||
|
fn read_tag(&mut self) -> FontResult<Tag> {
|
||||||
|
let mut tag = [0u8; 4];
|
||||||
|
self.data.read(&mut tag)?;
|
||||||
|
Ok(Tag(tag))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Gives a font loading error with a message.
|
||||||
|
fn err<T, S: Into<String>>(&self, message: S) -> FontResult<T> {
|
||||||
|
Err(LoadingError { message: message.into() })
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod font_tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
/// Test if the loaded font is the same as the expected font.
|
||||||
|
fn test(path: &str, font: Font) {
|
||||||
|
let mut file = std::fs::File::open(path).unwrap();
|
||||||
|
assert_eq!(Font::new(&mut file), Ok(font));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn opentype() {
|
||||||
|
test("../fonts/NotoSerif-Regular.ttf", Font {
|
||||||
|
name: "NotoSerif".to_owned(),
|
||||||
|
});
|
||||||
|
test("../fonts/NotoSansMath-Regular.ttf", Font {
|
||||||
|
name: "NotoSansMath-Regular".to_owned(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
11
src/lib.rs
Normal file
11
src/lib.rs
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
//! Typeset is a library for compiling _plain-text_ strings written in the
|
||||||
|
//! corresponding typesetting language into a typesetted document in a
|
||||||
|
//! file format like _PDF_.
|
||||||
|
|
||||||
|
#![allow(unused)]
|
||||||
|
|
||||||
|
pub mod parsing;
|
||||||
|
pub mod doc;
|
||||||
|
pub mod font;
|
||||||
|
pub mod pdf;
|
||||||
|
pub mod utility;
|
696
src/parsing.rs
Normal file
696
src/parsing.rs
Normal file
@ -0,0 +1,696 @@
|
|||||||
|
//! Parsing of source code into tokens and syntax trees.
|
||||||
|
|
||||||
|
use std::fmt;
|
||||||
|
use std::iter::Peekable;
|
||||||
|
use std::mem::swap;
|
||||||
|
use unicode_segmentation::{UnicodeSegmentation, UWordBounds};
|
||||||
|
use crate::utility::{Splinor, Spline, Splined, StrExt};
|
||||||
|
|
||||||
|
|
||||||
|
/// A logical unit of the incoming text stream.
|
||||||
|
#[derive(Debug, Clone, Eq, PartialEq)]
|
||||||
|
pub enum Token<'s> {
|
||||||
|
/// One or more whitespace (non-newline) codepoints.
|
||||||
|
Space,
|
||||||
|
/// A line feed (either `\n` or `\r\n`).
|
||||||
|
Newline,
|
||||||
|
/// A left bracket: `[`.
|
||||||
|
LeftBracket,
|
||||||
|
/// A right bracket: `]`.
|
||||||
|
RightBracket,
|
||||||
|
/// A colon (`:`) indicating the beginning of function arguments.
|
||||||
|
///
|
||||||
|
/// If a colon occurs outside of the function header, it will be
|
||||||
|
/// tokenized as a `Word`.
|
||||||
|
Colon,
|
||||||
|
/// Same as with `Colon`.
|
||||||
|
Equals,
|
||||||
|
/// Two underscores, indicating text in _italics_.
|
||||||
|
DoubleUnderscore,
|
||||||
|
/// Two stars, indicating **bold** text.
|
||||||
|
DoubleStar,
|
||||||
|
/// A dollar sign, indicating mathematical content.
|
||||||
|
Dollar,
|
||||||
|
/// A hashtag starting a comment.
|
||||||
|
Hashtag,
|
||||||
|
/// Everything else just is a literal word.
|
||||||
|
Word(&'s str),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/// A type that is seperable into logical units (tokens).
|
||||||
|
pub trait Tokenize {
|
||||||
|
/// Tokenize self into logical units.
|
||||||
|
fn tokenize<'s>(&'s self) -> Tokens<'s>;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Tokenize for str {
|
||||||
|
fn tokenize<'s>(&'s self) -> Tokens<'s> {
|
||||||
|
Tokens::new(self)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/// An iterator over the tokens of a text.
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub struct Tokens<'s> {
|
||||||
|
source: &'s str,
|
||||||
|
words: Peekable<UWordBounds<'s>>,
|
||||||
|
state: TokensState<'s>,
|
||||||
|
stack: Vec<TokensState<'s>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Debug for Tokens<'_> {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
|
f.debug_struct("Tokens")
|
||||||
|
.field("source", &self.source)
|
||||||
|
.field("words", &"Peekable<UWordBounds>")
|
||||||
|
.field("state", &self.state)
|
||||||
|
.field("stack", &self.stack)
|
||||||
|
.finish()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The state the tokenizer is in.
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
enum TokensState<'s> {
|
||||||
|
/// The base state if there is nothing special we are in.
|
||||||
|
Body,
|
||||||
|
/// Inside a function header. Here colons and equal signs get parsed
|
||||||
|
/// as distinct tokens rather than text.
|
||||||
|
Function,
|
||||||
|
/// We expect either the end of the function or the beginning of the body.
|
||||||
|
MaybeBody,
|
||||||
|
/// We are inside one unicode word that consists of multiple tokens,
|
||||||
|
/// because it contains double underscores.
|
||||||
|
DoubleUnderscore(Spline<'s, Token<'s>>),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PartialEq for TokensState<'_> {
|
||||||
|
fn eq(&self, other: &TokensState) -> bool {
|
||||||
|
use TokensState as TS;
|
||||||
|
|
||||||
|
match (self, other) {
|
||||||
|
(TS::Body, TS::Body) => true,
|
||||||
|
(TS::Function, TS::Function) => true,
|
||||||
|
(TS::MaybeBody, TS::MaybeBody) => true,
|
||||||
|
// They are not necessarily different, but we don't care
|
||||||
|
_ => false,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'s> Iterator for Tokens<'s> {
|
||||||
|
type Item = Token<'s>;
|
||||||
|
|
||||||
|
/// Advance the iterator, return the next token or nothing.
|
||||||
|
fn next(&mut self) -> Option<Token<'s>> {
|
||||||
|
use TokensState as TS;
|
||||||
|
|
||||||
|
// Return the remaining words and double underscores.
|
||||||
|
if let TS::DoubleUnderscore(ref mut splinor) = self.state {
|
||||||
|
loop {
|
||||||
|
if let Some(splined) = splinor.next() {
|
||||||
|
return Some(match splined {
|
||||||
|
Splined::Value(word) if word != "" => Token::Word(word),
|
||||||
|
Splined::Splinor(s) => s,
|
||||||
|
_ => continue,
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
self.unswitch();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Skip whitespace, but if at least one whitespace word existed,
|
||||||
|
// remember that, because we return a space token.
|
||||||
|
let mut whitespace = false;
|
||||||
|
while let Some(word) = self.words.peek() {
|
||||||
|
if !word.is_whitespace() {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
whitespace = true;
|
||||||
|
self.advance();
|
||||||
|
}
|
||||||
|
if whitespace {
|
||||||
|
return Some(Token::Space);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Function maybe has a body
|
||||||
|
if self.state == TS::MaybeBody {
|
||||||
|
match *self.words.peek()? {
|
||||||
|
"[" => {
|
||||||
|
self.state = TS::Body;
|
||||||
|
return Some(self.consumed(Token::LeftBracket));
|
||||||
|
},
|
||||||
|
_ => self.unswitch(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Now all special cases are handled and we can finally look at the
|
||||||
|
// next words.
|
||||||
|
let next = self.words.next()?;
|
||||||
|
let afterwards = self.words.peek();
|
||||||
|
|
||||||
|
Some(match next {
|
||||||
|
// Special characters
|
||||||
|
"[" => {
|
||||||
|
self.switch(TS::Function);
|
||||||
|
Token::LeftBracket
|
||||||
|
},
|
||||||
|
"]" => {
|
||||||
|
if self.state == TS::Function {
|
||||||
|
self.state = TS::MaybeBody;
|
||||||
|
}
|
||||||
|
Token::RightBracket
|
||||||
|
},
|
||||||
|
"$" => Token::Dollar,
|
||||||
|
"#" => Token::Hashtag,
|
||||||
|
|
||||||
|
// Context sensitive operators
|
||||||
|
":" if self.state == TS::Function => Token::Colon,
|
||||||
|
"=" if self.state == TS::Function => Token::Equals,
|
||||||
|
|
||||||
|
// Double star/underscore
|
||||||
|
"*" if afterwards == Some(&"*") => {
|
||||||
|
self.consumed(Token::DoubleStar)
|
||||||
|
},
|
||||||
|
"__" => Token::DoubleUnderscore,
|
||||||
|
|
||||||
|
// Newlines
|
||||||
|
"\n" | "\r\n" => Token::Newline,
|
||||||
|
|
||||||
|
// Escaping
|
||||||
|
r"\" => {
|
||||||
|
if let Some(next) = afterwards {
|
||||||
|
let escapable = match *next {
|
||||||
|
"[" | "]" | "$" | "#" | r"\" | ":" | "=" | "*" | "_" => true,
|
||||||
|
w if w.starts_with("__") => true,
|
||||||
|
_ => false,
|
||||||
|
};
|
||||||
|
|
||||||
|
if escapable {
|
||||||
|
let next = *next;
|
||||||
|
self.advance();
|
||||||
|
return Some(Token::Word(next));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Token::Word(r"\")
|
||||||
|
},
|
||||||
|
|
||||||
|
// Double underscores hidden in words.
|
||||||
|
word if word.contains("__") => {
|
||||||
|
let spline = word.spline("__", Token::DoubleUnderscore);
|
||||||
|
self.switch(TS::DoubleUnderscore(spline));
|
||||||
|
return self.next();
|
||||||
|
},
|
||||||
|
|
||||||
|
// Now it seems like it's just a normal word.
|
||||||
|
word => Token::Word(word),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'s> Tokens<'s> {
|
||||||
|
/// Create a new token stream from text.
|
||||||
|
#[inline]
|
||||||
|
pub fn new(source: &'s str) -> Tokens<'s> {
|
||||||
|
Tokens {
|
||||||
|
source,
|
||||||
|
words: source.split_word_bounds().peekable(),
|
||||||
|
state: TokensState::Body,
|
||||||
|
stack: vec![],
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Advance the iterator by one step.
|
||||||
|
#[inline]
|
||||||
|
fn advance(&mut self) {
|
||||||
|
self.words.next();
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Switch to the given state.
|
||||||
|
#[inline]
|
||||||
|
fn switch(&mut self, mut state: TokensState<'s>) {
|
||||||
|
swap(&mut state, &mut self.state);
|
||||||
|
self.stack.push(state);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Go back to the top-of-stack state.
|
||||||
|
#[inline]
|
||||||
|
fn unswitch(&mut self) {
|
||||||
|
self.state = self.stack.pop().unwrap_or(TokensState::Body);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Advance and return the given token.
|
||||||
|
#[inline]
|
||||||
|
fn consumed(&mut self, token: Token<'s>) -> Token<'s> {
|
||||||
|
self.advance();
|
||||||
|
token
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/// A tree representation of the source.
|
||||||
|
#[derive(Debug, Clone, PartialEq)]
|
||||||
|
pub struct SyntaxTree<'s> {
|
||||||
|
/// The children.
|
||||||
|
pub nodes: Vec<Node<'s>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'s> SyntaxTree<'s> {
|
||||||
|
/// Create an empty syntax tree.
|
||||||
|
pub fn new() -> SyntaxTree<'s> {
|
||||||
|
SyntaxTree { nodes: vec![] }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A node in the abstract syntax tree.
|
||||||
|
#[derive(Debug, Clone, PartialEq)]
|
||||||
|
pub enum Node<'s> {
|
||||||
|
/// Whitespace between other nodes.
|
||||||
|
Space,
|
||||||
|
/// A line feed.
|
||||||
|
Newline,
|
||||||
|
/// Indicates that italics were enabled/disabled.
|
||||||
|
ToggleItalics,
|
||||||
|
/// Indicates that boldface was enabled/disabled.
|
||||||
|
ToggleBold,
|
||||||
|
/// Indicates that math mode was enabled/disabled.
|
||||||
|
ToggleMath,
|
||||||
|
/// A literal word.
|
||||||
|
Word(&'s str),
|
||||||
|
/// A function invocation.
|
||||||
|
Func(Function<'s>),
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A node representing a function invocation.
|
||||||
|
#[derive(Debug, Clone, PartialEq)]
|
||||||
|
pub struct Function<'s> {
|
||||||
|
/// The name of the function.
|
||||||
|
pub name: &'s str,
|
||||||
|
/// Some syntax tree if the function had a body (second set of brackets),
|
||||||
|
/// otherwise nothing.
|
||||||
|
pub body: Option<SyntaxTree<'s>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/// A type that is parseable into a syntax tree.
|
||||||
|
pub trait Parse<'s> {
|
||||||
|
/// Parse self into a syntax tree.
|
||||||
|
fn parse(self) -> ParseResult<SyntaxTree<'s>>;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'s> Parse<'s> for Tokens<'s> {
|
||||||
|
fn parse(self) -> ParseResult<SyntaxTree<'s>> {
|
||||||
|
Parser::new(self).parse()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'s> Parse<'s> for Vec<Token<'s>> {
|
||||||
|
fn parse(self) -> ParseResult<SyntaxTree<'s>> {
|
||||||
|
Parser::new(self.into_iter()).parse()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Result type used for parsing.
|
||||||
|
type ParseResult<T> = std::result::Result<T, ParseError>;
|
||||||
|
|
||||||
|
/// A failure when parsing.
|
||||||
|
#[derive(Debug, Clone, Eq, PartialEq)]
|
||||||
|
pub struct ParseError {
|
||||||
|
/// A message describing the error.
|
||||||
|
pub message: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Display for ParseError {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
|
write!(f, "parse error: {}", self.message)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/// Parses a token stream into an abstract syntax tree.
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
struct Parser<'s, T> where T: Iterator<Item = Token<'s>> {
|
||||||
|
tokens: Peekable<T>,
|
||||||
|
state: ParserState,
|
||||||
|
stack: Vec<Function<'s>>,
|
||||||
|
tree: SyntaxTree<'s>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The state the parser is in.
|
||||||
|
#[derive(Debug, Clone, PartialEq)]
|
||||||
|
enum ParserState {
|
||||||
|
/// The base state of the parser.
|
||||||
|
Body,
|
||||||
|
/// Inside a function header.
|
||||||
|
Function,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'s, T> Parser<'s, T> where T: Iterator<Item = Token<'s>> {
|
||||||
|
/// Create a new parser from a type that emits results of tokens.
|
||||||
|
fn new(tokens: T) -> Parser<'s, T> {
|
||||||
|
Parser {
|
||||||
|
tokens: tokens.peekable(),
|
||||||
|
state: ParserState::Body,
|
||||||
|
stack: vec![],
|
||||||
|
tree: SyntaxTree::new(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parse into an abstract syntax tree.
|
||||||
|
fn parse(mut self) -> ParseResult<SyntaxTree<'s>> {
|
||||||
|
use ParserState as PS;
|
||||||
|
|
||||||
|
while let Some(token) = self.tokens.next() {
|
||||||
|
// Comment
|
||||||
|
if token == Token::Hashtag {
|
||||||
|
self.skip_while(|t| *t != Token::Newline);
|
||||||
|
self.advance();
|
||||||
|
}
|
||||||
|
|
||||||
|
match self.state {
|
||||||
|
PS::Body => match token {
|
||||||
|
// Whitespace
|
||||||
|
Token::Space => self.append(Node::Space),
|
||||||
|
Token::Newline => self.append(Node::Newline),
|
||||||
|
|
||||||
|
// Words
|
||||||
|
Token::Word(word) => self.append(Node::Word(word)),
|
||||||
|
|
||||||
|
// Functions
|
||||||
|
Token::LeftBracket => self.switch(PS::Function),
|
||||||
|
Token::RightBracket => {
|
||||||
|
match self.stack.pop() {
|
||||||
|
Some(func) => self.append(Node::Func(func)),
|
||||||
|
None => return self.err("unexpected closing bracket"),
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
// Modifiers
|
||||||
|
Token::DoubleUnderscore => self.append(Node::ToggleItalics),
|
||||||
|
Token::DoubleStar => self.append(Node::ToggleBold),
|
||||||
|
Token::Dollar => self.append(Node::ToggleMath),
|
||||||
|
|
||||||
|
// Should not happen
|
||||||
|
Token::Colon | Token::Equals | Token::Hashtag => unreachable!(),
|
||||||
|
},
|
||||||
|
|
||||||
|
PS::Function => {
|
||||||
|
let name = match token {
|
||||||
|
Token::Word(word) if word.is_identifier() => word,
|
||||||
|
_ => return self.err("expected identifier"),
|
||||||
|
};
|
||||||
|
|
||||||
|
if self.tokens.next() != Some(Token::RightBracket) {
|
||||||
|
return self.err("expected closing bracket");
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut func = Function {
|
||||||
|
name,
|
||||||
|
body: None,
|
||||||
|
};
|
||||||
|
|
||||||
|
// This function has a body.
|
||||||
|
if let Some(Token::LeftBracket) = self.tokens.peek() {
|
||||||
|
self.advance();
|
||||||
|
func.body = Some(SyntaxTree::new());
|
||||||
|
self.stack.push(func);
|
||||||
|
} else {
|
||||||
|
self.append(Node::Func(func));
|
||||||
|
}
|
||||||
|
|
||||||
|
self.switch(PS::Body);
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if !self.stack.is_empty() {
|
||||||
|
return self.err("expected closing bracket");
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(self.tree)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Advance the iterator by one step.
|
||||||
|
#[inline]
|
||||||
|
fn advance(&mut self) {
|
||||||
|
self.tokens.next();
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Skip tokens until the condition is met.
|
||||||
|
#[inline]
|
||||||
|
fn skip_while<F>(&mut self, f: F) where F: Fn(&Token) -> bool {
|
||||||
|
while let Some(token) = self.tokens.peek() {
|
||||||
|
if !f(token) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
self.advance();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Switch the state.
|
||||||
|
#[inline]
|
||||||
|
fn switch(&mut self, state: ParserState) {
|
||||||
|
self.state = state;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Append a node to the top-of-stack function or the main tree itself.
|
||||||
|
#[inline]
|
||||||
|
fn append(&mut self, node: Node<'s>) {
|
||||||
|
let tree = match self.stack.last_mut() {
|
||||||
|
Some(func) => func.body.get_or_insert_with(|| SyntaxTree::new()),
|
||||||
|
None => &mut self.tree,
|
||||||
|
};
|
||||||
|
|
||||||
|
tree.nodes.push(node);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Gives a parsing error with a message.
|
||||||
|
#[inline]
|
||||||
|
fn err<R, S: Into<String>>(&self, message: S) -> ParseResult<R> {
|
||||||
|
Err(ParseError { message: message.into() })
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod token_tests {
|
||||||
|
use super::*;
|
||||||
|
use Token::{Space as S, Newline as N, LeftBracket as L, RightBracket as R,
|
||||||
|
Colon as C, Equals as E, DoubleUnderscore as DU, DoubleStar as DS,
|
||||||
|
Dollar as D, Hashtag as H, Word as W};
|
||||||
|
|
||||||
|
/// Test if the source code tokenizes to the tokens.
|
||||||
|
fn test(src: &str, tokens: Vec<Token>) {
|
||||||
|
assert_eq!(src.tokenize().collect::<Vec<_>>(), tokens);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Tokenizes the basic building blocks.
|
||||||
|
#[test]
|
||||||
|
fn tokenize_base() {
|
||||||
|
test("", vec![]);
|
||||||
|
test("Hallo", vec![W("Hallo")]);
|
||||||
|
test("[", vec![L]);
|
||||||
|
test("]", vec![R]);
|
||||||
|
test("$", vec![D]);
|
||||||
|
test("#", vec![H]);
|
||||||
|
test("**", vec![DS]);
|
||||||
|
test("__", vec![DU]);
|
||||||
|
test("\n", vec![N]);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Tests if escaping with backslash works as it should.
|
||||||
|
#[test]
|
||||||
|
fn tokenize_escape() {
|
||||||
|
test(r"\[", vec![W("[")]);
|
||||||
|
test(r"\]", vec![W("]")]);
|
||||||
|
test(r"\#", vec![W("#")]);
|
||||||
|
test(r"\$", vec![W("$")]);
|
||||||
|
test(r"\:", vec![W(":")]);
|
||||||
|
test(r"\=", vec![W("=")]);
|
||||||
|
test(r"\**", vec![W("*"), W("*")]);
|
||||||
|
test(r"\*", vec![W("*")]);
|
||||||
|
test(r"\__", vec![W("__")]);
|
||||||
|
test(r"\_", vec![W("_")]);
|
||||||
|
test(r"\hello", vec![W(r"\"), W("hello")]);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Tokenizes some more realistic examples.
|
||||||
|
#[test]
|
||||||
|
fn tokenize_examples() {
|
||||||
|
test(r"
|
||||||
|
[function][
|
||||||
|
Test [italic][example]!
|
||||||
|
]
|
||||||
|
", vec![
|
||||||
|
N, S, L, W("function"), R, L, N, S, W("Test"), S, L, W("italic"), R, L,
|
||||||
|
W("example"), R, W("!"), N, S, R, N, S
|
||||||
|
]);
|
||||||
|
|
||||||
|
test(r"
|
||||||
|
[page: size=A4]
|
||||||
|
[font: size=12pt]
|
||||||
|
|
||||||
|
Das ist ein Beispielsatz mit **fetter** Schrift.
|
||||||
|
", vec![
|
||||||
|
N, S, L, W("page"), C, S, W("size"), E, W("A4"), R, N, S,
|
||||||
|
L, W("font"), C, S, W("size"), E, W("12pt"), R, N, N, S,
|
||||||
|
W("Das"), S, W("ist"), S, W("ein"), S, W("Beispielsatz"), S, W("mit"), S,
|
||||||
|
DS, W("fetter"), DS, S, W("Schrift"), W("."), N, S
|
||||||
|
]);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// This test checks whether the colon and equals symbols get parsed correctly
|
||||||
|
/// depending on the context: Either in a function header or in a body.
|
||||||
|
#[test]
|
||||||
|
fn tokenize_symbols_context() {
|
||||||
|
test("[func: key=value][Answer: 7]",
|
||||||
|
vec![L, W("func"), C, S, W("key"), E, W("value"), R, L,
|
||||||
|
W("Answer"), W(":"), S, W("7"), R]);
|
||||||
|
test("[[n: k=v]:x][:[=]]:=",
|
||||||
|
vec![L, L, W("n"), C, S, W("k"), E, W("v"), R, C, W("x"), R,
|
||||||
|
L, W(":"), L, E, R, R, W(":"), W("=")]);
|
||||||
|
test("[func: __key__=value]",
|
||||||
|
vec![L, W("func"), C, S, DU, W("key"), DU, E, W("value"), R]);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// This test has a special look at the double underscore syntax, because
|
||||||
|
/// per Unicode standard they are not seperate words and thus harder to parse
|
||||||
|
/// than the stars.
|
||||||
|
#[test]
|
||||||
|
fn tokenize_double_underscore() {
|
||||||
|
test("he__llo__world_ _ __ Now this_ is__ special!",
|
||||||
|
vec![W("he"), DU, W("llo"), DU, W("world_"), S, W("_"), S, DU, S, W("Now"), S,
|
||||||
|
W("this_"), S, W("is"), DU, S, W("special"), W("!")]);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// This test is for checking if non-ASCII characters get parsed correctly.
|
||||||
|
#[test]
|
||||||
|
fn tokenize_unicode() {
|
||||||
|
test("[document][Hello 🌍!]",
|
||||||
|
vec![L, W("document"), R, L, W("Hello"), S, W("🌍"), W("!"), R]);
|
||||||
|
test("[f]⺐.", vec![L, W("f"), R, W("⺐"), W(".")]);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// This test looks if LF- and CRLF-style newlines get both identified correctly.
|
||||||
|
#[test]
|
||||||
|
fn tokenize_whitespace_newlines() {
|
||||||
|
test(" \t", vec![S]);
|
||||||
|
test("First line\r\nSecond line\nThird line\n",
|
||||||
|
vec![W("First"), S, W("line"), N, W("Second"), S, W("line"), N,
|
||||||
|
W("Third"), S, W("line"), N]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod parse_tests {
|
||||||
|
use super::*;
|
||||||
|
use Node::{Space as S, Newline as N, Word as W, Func as F};
|
||||||
|
|
||||||
|
/// Test if the source code parses into the syntax tree.
|
||||||
|
fn test(src: &str, tree: SyntaxTree) {
|
||||||
|
assert_eq!(src.tokenize().parse(), Ok(tree));
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Test if the source parses into the error.
|
||||||
|
fn test_err(src: &str, err: ParseError) {
|
||||||
|
assert_eq!(src.tokenize().parse(), Err(err));
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Short cut macro to create a syntax tree.
|
||||||
|
/// Is `vec`-like and the elements are the nodes.
|
||||||
|
macro_rules! tree {
|
||||||
|
($($x:expr),*) => (
|
||||||
|
SyntaxTree { nodes: vec![$($x),*] }
|
||||||
|
);
|
||||||
|
($($x:expr,)*) => (tree![$($x),*])
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parse the basic cases.
|
||||||
|
#[test]
|
||||||
|
fn parse_base() {
|
||||||
|
test("", tree! {});
|
||||||
|
test("Hello World!", tree! { W("Hello"), S, W("World"), W("!")});
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parse things dealing with functions.
|
||||||
|
#[test]
|
||||||
|
fn parse_functions() {
|
||||||
|
test("[test]", tree! { F(Function { name: "test", body: None }) });
|
||||||
|
test("This is an [modifier][example] of a function invocation.", tree! {
|
||||||
|
W("This"), S, W("is"), S, W("an"), S,
|
||||||
|
F(Function { name: "modifier", body: Some(tree! { W("example") }) }), S,
|
||||||
|
W("of"), S, W("a"), S, W("function"), S, W("invocation"), W(".")
|
||||||
|
});
|
||||||
|
test("[func][Hello][links][Here][end]", tree! {
|
||||||
|
F(Function {
|
||||||
|
name: "func",
|
||||||
|
body: Some(tree! { W("Hello") }),
|
||||||
|
}),
|
||||||
|
F(Function {
|
||||||
|
name: "links",
|
||||||
|
body: Some(tree! { W("Here") }),
|
||||||
|
}),
|
||||||
|
F(Function {
|
||||||
|
name: "end",
|
||||||
|
body: None,
|
||||||
|
}),
|
||||||
|
});
|
||||||
|
test("[bodyempty][]", tree! {
|
||||||
|
F(Function {
|
||||||
|
name: "bodyempty",
|
||||||
|
body: Some(tree! {})
|
||||||
|
})
|
||||||
|
});
|
||||||
|
test("[nested][[func][call]] outside", tree! {
|
||||||
|
F(Function {
|
||||||
|
name: "nested",
|
||||||
|
body: Some(tree! { F(Function {
|
||||||
|
name: "func",
|
||||||
|
body: Some(tree! { W("call") }),
|
||||||
|
}), }),
|
||||||
|
}),
|
||||||
|
S, W("outside")
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Tests if the parser handles non-ASCII stuff correctly.
|
||||||
|
#[test]
|
||||||
|
fn parse_unicode() {
|
||||||
|
test("[lib_parse] ⺐.", tree! {
|
||||||
|
F(Function {
|
||||||
|
name: "lib_parse",
|
||||||
|
body: None
|
||||||
|
}),
|
||||||
|
S, W("⺐"), W(".")
|
||||||
|
});
|
||||||
|
test("[func123][Hello 🌍!]", tree! {
|
||||||
|
F(Function {
|
||||||
|
name: "func123",
|
||||||
|
body: Some(tree! { W("Hello"), S, W("🌍"), W("!") }),
|
||||||
|
})
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Tests whether errors get reported correctly.
|
||||||
|
#[test]
|
||||||
|
fn parse_errors() {
|
||||||
|
test_err("No functions here]", ParseError {
|
||||||
|
message: "unexpected closing bracket".to_owned(),
|
||||||
|
});
|
||||||
|
test_err("[hello][world", ParseError {
|
||||||
|
message: "expected closing bracket".to_owned(),
|
||||||
|
});
|
||||||
|
test_err("[hello world", ParseError {
|
||||||
|
message: "expected closing bracket".to_owned(),
|
||||||
|
});
|
||||||
|
test_err("[ no-name][Why?]", ParseError {
|
||||||
|
message: "expected identifier".to_owned(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
375
src/pdf.rs
Normal file
375
src/pdf.rs
Normal file
@ -0,0 +1,375 @@
|
|||||||
|
//! Writing of documents in the _PDF_ format.
|
||||||
|
|
||||||
|
use std::io::{self, Write};
|
||||||
|
use crate::doc::{Document, Text, DocumentFont, Size};
|
||||||
|
|
||||||
|
|
||||||
|
/// A type that is a sink for types that can be written conforming
|
||||||
|
/// to the _PDF_ format (that may be things like sizes, other objects
|
||||||
|
/// or whole documents).
|
||||||
|
pub trait WritePdf<T> {
|
||||||
|
/// Write self into a byte sink, returning how many bytes were written.
|
||||||
|
fn write_pdf(&mut self, object: &T) -> io::Result<usize>;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<W: Write> WritePdf<Document> for W {
|
||||||
|
fn write_pdf(&mut self, document: &Document) -> io::Result<usize> {
|
||||||
|
PdfWriter::new(document).write(self)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<W: Write> WritePdf<Size> for W {
|
||||||
|
fn write_pdf(&mut self, size: &Size) -> io::Result<usize> {
|
||||||
|
self.write_str(size.points)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A type that is a sink for types that can be converted to strings
|
||||||
|
/// and thus can be written string-like into a byte sink.
|
||||||
|
pub trait WriteByteString {
|
||||||
|
/// Write the string-like type into self, returning how many
|
||||||
|
/// bytes were written.
|
||||||
|
fn write_str<S: ToString>(&mut self, string_like: S) -> io::Result<usize>;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<W: Write> WriteByteString for W {
|
||||||
|
fn write_str<S: ToString>(&mut self, string_like: S) -> io::Result<usize> {
|
||||||
|
self.write(string_like.to_string().as_bytes())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/// Writes an abstract document into a byte sink in the _PDF_ format.
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
struct PdfWriter<'d> {
|
||||||
|
doc: &'d Document,
|
||||||
|
w: usize,
|
||||||
|
catalog_id: u32,
|
||||||
|
page_tree_id: u32,
|
||||||
|
resources_start: u32,
|
||||||
|
pages_start: u32,
|
||||||
|
content_start: u32,
|
||||||
|
xref_table: Vec<u32>,
|
||||||
|
offset_xref: u32,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'d> PdfWriter<'d> {
|
||||||
|
/// Create a new pdf writer from a document.
|
||||||
|
fn new(doc: &'d Document) -> PdfWriter<'d> {
|
||||||
|
// Calculate unique ids for each object
|
||||||
|
let catalog_id: u32 = 1;
|
||||||
|
let page_tree_id = catalog_id + 1;
|
||||||
|
let pages_start = page_tree_id + 1;
|
||||||
|
let resources_start = pages_start + doc.pages.len() as u32;
|
||||||
|
let content_start = resources_start + doc.fonts.len() as u32;
|
||||||
|
|
||||||
|
PdfWriter {
|
||||||
|
doc,
|
||||||
|
catalog_id,
|
||||||
|
page_tree_id,
|
||||||
|
resources_start,
|
||||||
|
pages_start,
|
||||||
|
content_start,
|
||||||
|
w: 0,
|
||||||
|
xref_table: vec![],
|
||||||
|
offset_xref: 0,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Write the document into a byte sink.
|
||||||
|
fn write<W: Write>(&mut self, target: &mut W) -> io::Result<usize> {
|
||||||
|
self.write_header(target)?;
|
||||||
|
|
||||||
|
self.write_document_catalog(target)?;
|
||||||
|
self.write_page_tree(target)?;
|
||||||
|
self.write_pages(target)?;
|
||||||
|
|
||||||
|
self.write_resources(target)?;
|
||||||
|
|
||||||
|
self.write_content(target)?;
|
||||||
|
// self.write_fonts(target)?;
|
||||||
|
|
||||||
|
self.write_xref_table(target)?;
|
||||||
|
self.write_trailer(target)?;
|
||||||
|
self.write_start_xref(target)?;
|
||||||
|
|
||||||
|
Ok(self.w)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Write the pdf header.
|
||||||
|
fn write_header<W: Write>(&mut self, target: &mut W) -> io::Result<usize> {
|
||||||
|
// Write the magic start
|
||||||
|
self.w += target.write(b"%PDF-1.7\n")?;
|
||||||
|
Ok(self.w)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Write the document catalog (contains general info about the document).
|
||||||
|
fn write_document_catalog<W: Write>(&mut self, target: &mut W) -> io::Result<usize> {
|
||||||
|
self.xref_table.push(self.w as u32);
|
||||||
|
|
||||||
|
self.w += target.write_str(self.catalog_id)?;
|
||||||
|
self.w += target.write(b" 0 obj\n")?;
|
||||||
|
self.w += target.write(b"<<\n")?;
|
||||||
|
self.w += target.write(b"/Type /Catalog\n")?;
|
||||||
|
|
||||||
|
self.w += target.write(b"/Pages ")?;
|
||||||
|
self.w += target.write_str(self.page_tree_id)?;
|
||||||
|
self.w += target.write(b" 0 R\n")?;
|
||||||
|
|
||||||
|
self.w += target.write(b">>\n")?;
|
||||||
|
self.w += target.write(b"endobj\n")?;
|
||||||
|
|
||||||
|
Ok(self.w)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Write the page tree (overview over the pages of a document).
|
||||||
|
fn write_page_tree<W: Write>(&mut self, target: &mut W) -> io::Result<usize> {
|
||||||
|
self.xref_table.push(self.w as u32);
|
||||||
|
|
||||||
|
// Create page tree
|
||||||
|
self.w += target.write_str(self.page_tree_id)?;
|
||||||
|
self.w += target.write(b" 0 obj\n")?;
|
||||||
|
self.w += target.write(b"<<\n")?;
|
||||||
|
self.w += target.write(b"/Type /Pages\n")?;
|
||||||
|
|
||||||
|
self.w += target.write(b"/Count ")?;
|
||||||
|
self.w += target.write_str(self.doc.pages.len())?;
|
||||||
|
self.w += target.write(b"\n")?;
|
||||||
|
|
||||||
|
self.w += target.write(b"/Kids [")?;
|
||||||
|
|
||||||
|
for id in self.pages_start .. self.pages_start + self.doc.pages.len() as u32 {
|
||||||
|
self.w += target.write_str(id)?;
|
||||||
|
self.w += target.write(b" 0 R ")?;
|
||||||
|
}
|
||||||
|
|
||||||
|
self.w += target.write(b"]\n")?;
|
||||||
|
|
||||||
|
self.w += target.write(b"/Resources\n")?;
|
||||||
|
self.w += target.write(b"<<\n")?;
|
||||||
|
|
||||||
|
self.w += target.write(b"/Font\n")?;
|
||||||
|
self.w += target.write(b"<<\n")?;
|
||||||
|
|
||||||
|
let mut font_id = self.resources_start;
|
||||||
|
for nr in 1 ..= self.doc.fonts.len() as u32 {
|
||||||
|
self.w += target.write(b"/F")?;
|
||||||
|
self.w += target.write_str(nr)?;
|
||||||
|
self.w += target.write(b" ")?;
|
||||||
|
self.w += target.write_str(font_id)?;
|
||||||
|
self.w += target.write(b" 0 R\n")?;
|
||||||
|
font_id += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
self.w += target.write(b">>\n")?;
|
||||||
|
self.w += target.write(b">>\n")?;
|
||||||
|
|
||||||
|
self.w += target.write(b">>\n")?;
|
||||||
|
self.w += target.write(b"endobj\n")?;
|
||||||
|
|
||||||
|
Ok(self.w)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Write the page descriptions.
|
||||||
|
fn write_pages<W: Write>(&mut self, target: &mut W) -> io::Result<usize> {
|
||||||
|
let mut page_id = self.pages_start;
|
||||||
|
let mut content_id = self.content_start;
|
||||||
|
|
||||||
|
for page in &self.doc.pages {
|
||||||
|
self.xref_table.push(self.w as u32);
|
||||||
|
|
||||||
|
self.w += target.write_str(page_id)?;
|
||||||
|
self.w += target.write(b" 0 obj\n")?;
|
||||||
|
self.w += target.write(b"<<\n")?;
|
||||||
|
self.w += target.write(b"/Type /Page\n")?;
|
||||||
|
|
||||||
|
self.w += target.write(b"/Parent ")?;
|
||||||
|
self.w += target.write_str(self.page_tree_id)?;
|
||||||
|
self.w += target.write(b" 0 R\n")?;
|
||||||
|
|
||||||
|
self.w += target.write(b"/MediaBox [0 0 ")?;
|
||||||
|
self.w += target.write_pdf(&page.size[0])?;
|
||||||
|
self.w += target.write(b" ")?;
|
||||||
|
self.w += target.write_pdf(&page.size[1])?;
|
||||||
|
self.w += target.write(b"]\n")?;
|
||||||
|
|
||||||
|
self.w += target.write(b"/Contents [")?;
|
||||||
|
|
||||||
|
for _ in &page.contents {
|
||||||
|
self.w += target.write_str(content_id)?;
|
||||||
|
self.w += target.write(b" 0 R ")?;
|
||||||
|
|
||||||
|
content_id += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
self.w += target.write(b"]\n")?;
|
||||||
|
|
||||||
|
self.w += target.write(b">>\n")?;
|
||||||
|
self.w += target.write(b"endobj\n")?;
|
||||||
|
|
||||||
|
page_id += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(self.w)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Write the resources used by the file (fonts and friends).
|
||||||
|
fn write_resources<W: Write>(&mut self, target: &mut W) -> io::Result<usize> {
|
||||||
|
let mut id = self.resources_start;
|
||||||
|
|
||||||
|
for font in &self.doc.fonts {
|
||||||
|
self.xref_table.push(self.w as u32);
|
||||||
|
|
||||||
|
self.w += target.write_str(id)?;
|
||||||
|
self.w += target.write(b" 0 obj\n")?;
|
||||||
|
self.w += target.write(b"<<\n")?;
|
||||||
|
self.w += target.write(b"/Type /Font\n")?;
|
||||||
|
|
||||||
|
match font {
|
||||||
|
DocumentFont::Builtin(builtin) => {
|
||||||
|
self.w += target.write(b"/Subtype /Type1\n")?;
|
||||||
|
self.w += target.write(b"/BaseFont /")?;
|
||||||
|
self.w += target.write_str(builtin.name())?;
|
||||||
|
self.w += target.write(b"\n")?;
|
||||||
|
},
|
||||||
|
DocumentFont::Loaded(font) => {
|
||||||
|
self.w += target.write(b"/Subtype /TrueType\n")?;
|
||||||
|
self.w += target.write(b"/BaseFont /")?;
|
||||||
|
self.w += target.write_str(font.name.as_str())?;
|
||||||
|
self.w += target.write(b"\n")?;
|
||||||
|
unimplemented!();
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
self.w += target.write(b">>\n")?;
|
||||||
|
self.w += target.write(b"endobj\n")?;
|
||||||
|
|
||||||
|
id += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(self.w)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Write the page contents.
|
||||||
|
fn write_content<W: Write>(&mut self, target: &mut W) -> io::Result<usize> {
|
||||||
|
let mut id = self.content_start;
|
||||||
|
|
||||||
|
for page in &self.doc.pages {
|
||||||
|
for content in &page.contents {
|
||||||
|
self.xref_table.push(self.w as u32);
|
||||||
|
|
||||||
|
self.w += target.write_str(id)?;
|
||||||
|
self.w += target.write(b" 0 obj\n")?;
|
||||||
|
self.w += target.write(b"<<\n")?;
|
||||||
|
|
||||||
|
let mut buffer = Vec::new();
|
||||||
|
buffer.write(b"BT/\n")?;
|
||||||
|
|
||||||
|
buffer.write(b"/F1 13 Tf\n")?;
|
||||||
|
buffer.write(b"108 734 Td\n")?;
|
||||||
|
buffer.write(b"(")?;
|
||||||
|
|
||||||
|
let Text(string) = content;
|
||||||
|
buffer.write(string.as_bytes())?;
|
||||||
|
|
||||||
|
buffer.write(b") Tj\n")?;
|
||||||
|
buffer.write(b"ET\n")?;
|
||||||
|
|
||||||
|
self.w += target.write(b"/Length ")?;
|
||||||
|
self.w += target.write_str(buffer.len())?;
|
||||||
|
self.w += target.write(b"\n")?;
|
||||||
|
|
||||||
|
self.w += target.write(b">>\n")?;
|
||||||
|
|
||||||
|
self.w += target.write(b"stream\n")?;
|
||||||
|
self.w += target.write(&buffer)?;
|
||||||
|
self.w += target.write(b"endstream\n")?;
|
||||||
|
|
||||||
|
self.w += target.write(b"endobj\n")?;
|
||||||
|
|
||||||
|
id += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(self.w)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Write the cross-reference table.
|
||||||
|
fn write_xref_table<W: Write>(&mut self, target: &mut W) -> io::Result<usize> {
|
||||||
|
self.offset_xref = self.w as u32;
|
||||||
|
|
||||||
|
self.w += target.write(b"xref\n")?;
|
||||||
|
self.w += target.write(b"0 ")?;
|
||||||
|
self.w += target.write_str(self.xref_table.len())?;
|
||||||
|
self.w += target.write(b"\n")?;
|
||||||
|
|
||||||
|
self.w += target.write(b"0000000000 65535 f\r\n")?;
|
||||||
|
|
||||||
|
for offset in &self.xref_table {
|
||||||
|
self.w += target.write(format!("{:010}", offset).as_bytes())?;
|
||||||
|
self.w += target.write(b" 00000 n")?;
|
||||||
|
self.w += target.write(b"\r\n")?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(self.w)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Write the trailer (points to the root object).
|
||||||
|
fn write_trailer<W: Write>(&mut self, target: &mut W) -> io::Result<usize> {
|
||||||
|
self.w += target.write(b"trailer\n")?;
|
||||||
|
self.w += target.write(b"<<\n")?;
|
||||||
|
|
||||||
|
self.w += target.write(b"/Root ")?;
|
||||||
|
self.w += target.write_str(self.catalog_id)?;
|
||||||
|
self.w += target.write(b" 0 R\n")?;
|
||||||
|
|
||||||
|
self.w += target.write(b"/Size ")?;
|
||||||
|
self.w += target.write_str(self.xref_table.len() + 1)?;
|
||||||
|
self.w += target.write(b"\n")?;
|
||||||
|
|
||||||
|
self.w += target.write(b">>\n")?;
|
||||||
|
|
||||||
|
Ok(self.w)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Write where the cross-reference table starts.
|
||||||
|
fn write_start_xref<W: Write>(&mut self, target: &mut W) -> io::Result<usize> {
|
||||||
|
self.w += target.write(b"startxref\n")?;
|
||||||
|
self.w += target.write_str(self.offset_xref)?;
|
||||||
|
self.w += target.write(b"\n")?;
|
||||||
|
|
||||||
|
Ok(self.w)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod pdf_tests {
|
||||||
|
use super::*;
|
||||||
|
use crate::parsing::{Tokenize, Parse};
|
||||||
|
use crate::doc::Generate;
|
||||||
|
|
||||||
|
/// Create a pdf with a name from the source code.
|
||||||
|
fn test(name: &str, src: &str) {
|
||||||
|
let mut file = std::fs::File::create(name).unwrap();
|
||||||
|
let doc = src.tokenize()
|
||||||
|
.parse().unwrap()
|
||||||
|
.generate().unwrap();
|
||||||
|
file.write_pdf(&doc).unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn pdf_simple() {
|
||||||
|
test("../target/write1.pdf", "This is an example of a sentence.");
|
||||||
|
test("../target/write2.pdf","
|
||||||
|
Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed
|
||||||
|
diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam
|
||||||
|
voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd
|
||||||
|
gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor
|
||||||
|
sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut
|
||||||
|
labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et
|
||||||
|
justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est
|
||||||
|
Lorem ipsum dolor sit amet.
|
||||||
|
");
|
||||||
|
}
|
||||||
|
}
|
138
src/utility.rs
Normal file
138
src/utility.rs
Normal file
@ -0,0 +1,138 @@
|
|||||||
|
//! Utility functionality.
|
||||||
|
|
||||||
|
use std::str::Split;
|
||||||
|
use std::iter::Peekable;
|
||||||
|
use unicode_xid::UnicodeXID;
|
||||||
|
|
||||||
|
|
||||||
|
/// Types that can be splined.
|
||||||
|
pub trait Splinor {
|
||||||
|
/// Returns an iterator over the substrings splitted by the pattern,
|
||||||
|
/// intertwined with the splinor.
|
||||||
|
///
|
||||||
|
/// # Example
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// # use typeset::utility::*;
|
||||||
|
/// #[derive(Debug, Copy, Clone, PartialEq)]
|
||||||
|
/// struct Space;
|
||||||
|
///
|
||||||
|
/// let v: Vec<Splined<Space>> = "My airplane flies!".spline(" ", Space).collect();
|
||||||
|
/// assert_eq!(v, [
|
||||||
|
/// Splined::Value("My"),
|
||||||
|
/// Splined::Splinor(Space),
|
||||||
|
/// Splined::Value("airplane"),
|
||||||
|
/// Splined::Splinor(Space),
|
||||||
|
/// Splined::Value("flies!"),
|
||||||
|
/// ]);
|
||||||
|
/// ```
|
||||||
|
fn spline<'s, T: Clone>(&'s self, pat: &'s str, splinor: T) -> Spline<'s, T>;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Splinor for str {
|
||||||
|
fn spline<'s, T: Clone>(&'s self, pat: &'s str, splinor: T) -> Spline<'s, T> {
|
||||||
|
Spline {
|
||||||
|
splinor: Splined::Splinor(splinor),
|
||||||
|
split: self.split(pat).peekable(),
|
||||||
|
next_splinor: false,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Iterator over splitted values and splinors.
|
||||||
|
///
|
||||||
|
/// Created by the [`spline`](Splinor::spline) function.
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct Spline<'s, T> {
|
||||||
|
splinor: Splined<'s, T>,
|
||||||
|
split: Peekable<Split<'s, &'s str>>,
|
||||||
|
next_splinor: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Represents either a splitted substring or a splinor.
|
||||||
|
#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
|
||||||
|
pub enum Splined<'s, T> {
|
||||||
|
/// A substring.
|
||||||
|
Value(&'s str),
|
||||||
|
/// An intertwined splinor.
|
||||||
|
Splinor(T),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'s, T: Clone> Iterator for Spline<'s, T> {
|
||||||
|
type Item = Splined<'s, T>;
|
||||||
|
|
||||||
|
fn next(&mut self) -> Option<Splined<'s, T>> {
|
||||||
|
if self.next_splinor && self.split.peek().is_some() {
|
||||||
|
self.next_splinor = false;
|
||||||
|
return Some(self.splinor.clone());
|
||||||
|
} else {
|
||||||
|
self.next_splinor = true;
|
||||||
|
return Some(Splined::Value(self.split.next()?))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/// More useful functions on `str`'s.
|
||||||
|
pub trait StrExt {
|
||||||
|
/// Whether self consists only of whitespace.
|
||||||
|
fn is_whitespace(&self) -> bool;
|
||||||
|
|
||||||
|
/// Whether this word is a valid unicode identifier.
|
||||||
|
fn is_identifier(&self) -> bool;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl StrExt for str {
|
||||||
|
#[inline]
|
||||||
|
fn is_whitespace(&self) -> bool {
|
||||||
|
self.chars().all(|c| c.is_whitespace() && c != '\n')
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_identifier(&self) -> bool {
|
||||||
|
let mut chars = self.chars();
|
||||||
|
|
||||||
|
match chars.next() {
|
||||||
|
Some(c) if !UnicodeXID::is_xid_start(c) => return false,
|
||||||
|
None => return false,
|
||||||
|
_ => (),
|
||||||
|
}
|
||||||
|
|
||||||
|
while let Some(c) = chars.next() {
|
||||||
|
if !UnicodeXID::is_xid_continue(c) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod splinor_tests {
|
||||||
|
use super::*;
|
||||||
|
use Splined::{Value as V, Splinor as S};
|
||||||
|
|
||||||
|
#[derive(Debug, Copy, Clone, PartialEq)]
|
||||||
|
enum Token { DoubleUnderscore }
|
||||||
|
|
||||||
|
fn test<T>(string: &str, pat: &str, splinor: T, vec: Vec<Splined<T>>)
|
||||||
|
where T: std::fmt::Debug + Clone + PartialEq {
|
||||||
|
assert_eq!(string.spline(pat, splinor).collect::<Vec<_>>(), vec);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn splinor() {
|
||||||
|
let s = S(Token::DoubleUnderscore);
|
||||||
|
test("__he__llo__world__", "__", Token::DoubleUnderscore,
|
||||||
|
vec![V(""), s, V("he"), s, V("llo"), s, V("world"), s, V("")]);
|
||||||
|
test("__Italic__", "__", Token::DoubleUnderscore,
|
||||||
|
vec![V(""), s, V("Italic"), s, V("")]);
|
||||||
|
test("Key__Value", "__", Token::DoubleUnderscore,
|
||||||
|
vec![V("Key"), s, V("Value")]);
|
||||||
|
test("__Start__NoEnd", "__", Token::DoubleUnderscore,
|
||||||
|
vec![V(""), s, V("Start"), s, V("NoEnd")]);
|
||||||
|
test("NoStart__End__", "__", Token::DoubleUnderscore,
|
||||||
|
vec![V("NoStart"), s, V("End"), s, V("")]);
|
||||||
|
}
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user