From b1e956419d94a0c3876891b3d6a4976cc4a3ab09 Mon Sep 17 00:00:00 2001
From: Laurenz <laurmaedje@gmail.com>
Date: Sat, 11 Jan 2020 10:11:14 +0100
Subject: [PATCH] =?UTF-8?q?Re-engineer=20tokenization=20=F0=9F=9A=BF?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/func/mod.rs         |   2 +-
 src/library/mod.rs      |   7 +-
 src/size.rs             |   2 +-
 src/style.rs            |  13 +-
 src/syntax/mod.rs       |  60 +--
 src/syntax/parsing.rs   | 835 +---------------------------------------
 src/syntax/span.rs      |   2 -
 src/syntax/tokens.rs    | 765 ++++++++++++++----------------------
 tests/parse.rs          |  21 +-
 tests/parsing/base.rs   |  78 ----
 tests/parsing/tokens.rs |  62 +++
 11 files changed, 398 insertions(+), 1449 deletions(-)
 delete mode 100644 tests/parsing/base.rs
 create mode 100644 tests/parsing/tokens.rs
diff --git a/src/func/mod.rs b/src/func/mod.rs
index 69f28e007..01c77327e 100644
--- a/src/func/mod.rs
+++ b/src/func/mod.rs
@@ -15,7 +15,7 @@ pub mod prelude {
     pub use crate::func::{Scope, ParseFunc, LayoutFunc, Command, Commands};
     pub use crate::layout::prelude::*;
     pub use crate::syntax::{
-        parse, ParseContext, ParseResult,
+        ParseContext, ParseResult,
         SyntaxTree, FuncCall, FuncArgs, PosArg, KeyArg,
         Expression, Ident, ExpressionKind,
         Spanned, Span
diff --git a/src/library/mod.rs b/src/library/mod.rs
index 013e99627..92c3c9488 100644
--- a/src/library/mod.rs
+++ b/src/library/mod.rs
@@ -297,9 +297,10 @@ function! {
         parse!(forbidden: body);
 
         if let Some(name) = args.get_pos_opt::<Ident>()? {
-            let flip = args.get_key_opt::<bool>("flip")?
-                .unwrap_or(false);
-            PageSizeFunc::Paper(Paper::from_name(name.as_str())?, flip)
+            let flip = args.get_key_opt::<bool>("flip")?.unwrap_or(false);
+            let paper = Paper::from_name(name.as_str())
+                .ok_or_else(|| error!(@"invalid paper name: `{}`", name))?;
+            PageSizeFunc::Paper(paper, flip)
         } else {
             PageSizeFunc::Custom(ExtentMap::new(&mut args, true)?)
         }
diff --git a/src/size.rs b/src/size.rs
index 5b84c2ad8..a5bc5d7ff 100644
--- a/src/size.rs
+++ b/src/size.rs
@@ -72,7 +72,7 @@ impl Size {
 
 impl Display for Size {
     fn fmt(&self, f: &mut Formatter) -> fmt::Result {
-        write!(f, "{}cm", self.to_cm())
+        write!(f, "{}pt", self.points)
     }
 }
 
diff --git a/src/style.rs b/src/style.rs
index e552a63d6..35de5da13 100644
--- a/src/style.rs
+++ b/src/style.rs
@@ -3,7 +3,6 @@
 use toddle::query::{FontFallbackTree, FontVariant, FontStyle, FontWeight};
 
 use crate::size::{Size, Size2D, SizeBox, ValueBox, PSize};
-use crate::syntax::ParseResult;
 
 
 /// Defines properties of pages and text.
@@ -157,7 +156,7 @@ pub struct Paper {
 
 impl Paper {
     /// The paper with the given name.
-    pub fn from_name(name: &str) -> ParseResult<Paper> {
+    pub fn from_name(name: &str) -> Option<Paper> {
         parse_paper(name)
     }
 }
@@ -193,11 +192,11 @@ macro_rules! papers {
             class: $class,
         };)*
 
-        fn parse_paper(paper: &str) -> ParseResult<Paper> {
-            Ok(match paper.to_lowercase().as_str() {
-                $($($patterns)* => $var,)*
-                _ => error!("unknown paper size: `{}`", paper),
-            })
+        fn parse_paper(paper: &str) -> Option<Paper> {
+            match paper.to_lowercase().as_str() {
+                $($($patterns)* => Some($var),)*
+                _ => None,
+            }
         }
     };
 }
diff --git a/src/syntax/mod.rs b/src/syntax/mod.rs
index b0cbcafae..10a509d2f 100644
--- a/src/syntax/mod.rs
+++ b/src/syntax/mod.rs
@@ -11,48 +11,6 @@ pub_use_mod!(parsing);
 pub_use_mod!(span);
 
 
-/// A logical unit of the incoming text stream.
-#[derive(Debug, Copy, Clone, Eq, PartialEq)]
-pub enum Token<'s> {
-    /// One or more whitespace (non-newline) codepoints.
-    Space,
-    /// A line feed (`\n`, `\r\n` and some more as defined by the Unicode standard).
-    Newline,
-    /// A left bracket: `[`.
-    LeftBracket,
-    /// A right bracket: `]`.
-    RightBracket,
-    /// A colon (`:`) indicating the beginning of function arguments (Function
-    /// header only).
-    ///
-    /// If a colon occurs outside of a function header, it will be tokenized as
-    /// [Text](Token::Text), just like the other tokens annotated with
-    /// _Header only_.
-    Colon,
-    /// An equals (`=`) sign assigning a function argument a value (Header only).
-    Equals,
-    /// A comma (`,`) separating two function arguments (Header only).
-    Comma,
-    /// Quoted text as a string value (Header only).
-    Quoted(&'s str),
-    /// An underscore, indicating text in italics (Body only).
-    Underscore,
-    /// A star, indicating bold text (Body only).
-    Star,
-    /// A backtick, indicating monospace text (Body only).
-    Backtick,
-    /// A line comment.
-    LineComment(&'s str),
-    /// A block comment.
-    BlockComment(&'s str),
-    /// A star followed by a slash unexpectedly ending a block comment
-    /// (the comment was not started before, otherwise a
-    /// [BlockComment](Token::BlockComment) would be returned).
-    StarSlash,
-    /// Any consecutive string which does not contain markup.
-    Text(&'s str),
-}
-
 /// A tree representation of source code.
 #[derive(Debug, PartialEq)]
 pub struct SyntaxTree {
@@ -256,11 +214,11 @@ debug_display!(Expression);
 pub struct Ident(pub String);
 
 impl Ident {
-    pub fn new(string: String) -> ParseResult<Ident> {
-        if is_identifier(&string) {
-            Ok(Ident(string))
+    pub fn new<S>(ident: S) -> Option<Ident> where S: AsRef<str> + Into<String> {
+        if is_identifier(ident.as_ref()) {
+            Some(Ident(ident.into()))
         } else {
-            error!("invalid identifier: `{}`", string);
+            None
         }
     }
 
@@ -277,20 +235,20 @@ impl Display for Ident {
 
 debug_display!(Ident);
 
-/// Whether this word is a valid unicode identifier.
+/// Whether this word is a valid identifier.
 fn is_identifier(string: &str) -> bool {
     let mut chars = string.chars();
 
     match chars.next() {
-        Some('-') => (),
-        Some(c) if UnicodeXID::is_xid_start(c) => (),
+        Some('-') => {}
+        Some(c) if UnicodeXID::is_xid_start(c) => {}
         _ => return false,
     }
 
     while let Some(c) = chars.next() {
         match c {
-            '.' | '-' => (),
-            c if UnicodeXID::is_xid_continue(c) => (),
+            '.' | '-' => {}
+            c if UnicodeXID::is_xid_continue(c) => {}
             _ => return false,
         }
     }
diff --git a/src/syntax/parsing.rs b/src/syntax/parsing.rs
index dc39145ac..4a50ef963 100644
--- a/src/syntax/parsing.rs
+++ b/src/syntax/parsing.rs
@@ -1,7 +1,4 @@
-//! Parsing of token streams into syntax trees.
-
 use crate::func::Scope;
-use crate::size::Size;
 use super::*;
 
 
@@ -10,7 +7,7 @@ pub type ParseResult<T> = crate::TypesetResult<T>;
 
 /// Parses source code into a syntax tree given a context.
 pub fn parse(src: &str, ctx: ParseContext) -> ParseResult<SyntaxTree> {
-    Parser::new(src, ctx).parse()
+    unimplemented!()
 }
 
 /// The context for parsing.
@@ -19,833 +16,3 @@ pub struct ParseContext<'a> {
     /// The scope containing function definitions.
     pub scope: &'a Scope,
 }
-
-/// Transforms token streams into syntax trees.
-#[derive(Debug)]
-struct Parser<'s> {
-    src: &'s str,
-    tokens: PeekableTokens<'s>,
-    ctx: ParseContext<'s>,
-    tree: SyntaxTree,
-    color_tokens: Vec<Spanned<ColorToken>>,
-}
-
-#[derive(Debug, Copy, Clone, Eq, PartialEq)]
-enum NewlineState {
-    /// No newline yet.
-    Zero,
-    /// We saw one newline with the given span already and are
-    /// looking for another.
-    One(Span),
-    /// We saw at least two newlines and wrote one, thus not
-    /// writing another one for more newlines.
-    TwoOrMore,
-}
-
-impl<'s> Parser<'s> {
-    /// Create a new parser from the source code and the context.
-    fn new(src: &'s str, ctx: ParseContext<'s>) -> Parser<'s> {
-        Parser {
-            src,
-            tokens: PeekableTokens::new(tokenize(src)),
-            ctx,
-            tree: SyntaxTree::new(),
-            color_tokens: vec![],
-        }
-    }
-
-    /// Parse the source into a syntax tree.
-    fn parse(mut self) -> ParseResult<SyntaxTree> {
-        while self.tokens.peek().is_some() {
-            self.parse_white()?;
-            self.parse_body_part()?;
-        }
-
-        Ok(self.tree)
-    }
-
-    /// Parse the next part of the body.
-    fn parse_body_part(&mut self) -> ParseResult<()> {
-        use Token::*;
-
-        if let Some(token) = self.tokens.peek() {
-            match token.v {
-                // Functions.
-                LeftBracket => self.parse_func()?,
-                RightBracket => error!("unexpected closing bracket"),
-
-                // Modifiers.
-                Underscore => self.add_consumed(Node::ToggleItalics, token.span),
-                Star => self.add_consumed(Node::ToggleBolder, token.span),
-                Backtick => self.add_consumed(Node::ToggleMonospace, token.span),
-
-                // Normal text.
-                Text(word) => self.add_consumed(Node::Text(word.to_owned()), token.span),
-
-                // The rest is handled elsewhere or should not happen, because
-                // the tokenizer does not yield these in a body.
-                Space | Newline | LineComment(_) | BlockComment(_) |
-                Colon | Equals | Comma | Quoted(_) | StarSlash
-                    => panic!("parse_body_part: unexpected token: {:?}", token),
-            }
-        }
-
-        Ok(())
-    }
-
-    /// Parse a complete function from the current position.
-    fn parse_func(&mut self) -> ParseResult<()> {
-        // This should only be called if a left bracket was seen.
-        let token = self.tokens.next().expect("parse_func: expected token");
-        assert!(token.v == Token::LeftBracket);
-
-        self.add_color_token(ColorToken::Bracket, token.span);
-
-        let mut span = token.span;
-        let name = self.parse_func_name()?;
-
-        // Check for arguments
-        let args = match self.tokens.next() {
-            Some(Spanned { v: Token::RightBracket, span }) => {
-                self.add_color_token(ColorToken::Bracket, span);
-                FuncArgs::new()
-            },
-            Some(Spanned { v: Token::Colon, span }) => {
-                self.add_color_token(ColorToken::Colon, span);
-                self.parse_func_args()?
-            }
-            _ => error!("expected arguments or closing bracket"),
-        };
-
-        span.end = self.tokens.get_position();
-        let (func, body_span) = self.parse_func_call(name, args)?;
-
-        if let Some(body_span) = body_span {
-            span.expand(body_span);
-        }
-
-        // Finally this function is parsed to the end.
-        self.add(Node::Func(func), span);
-
-        Ok(())
-    }
-
-    /// Parse a function header.
-    fn parse_func_name(&mut self) -> ParseResult<Spanned<Ident>> {
-        self.skip_white();
-
-        let name = match self.tokens.next() {
-            Some(Spanned { v: Token::Text(word), span }) => {
-                let ident = Ident::new(word.to_string())?;
-                Spanned::new(ident, span)
-            }
-            _ => error!("expected identifier"),
-        };
-
-        self.add_color_token(ColorToken::FuncName, name.span);
-        self.skip_white();
-
-        Ok(name)
-    }
-
-    /// Parse the arguments to a function.
-    fn parse_func_args(&mut self) -> ParseResult<FuncArgs> {
-        let mut args = FuncArgs::new();
-
-        loop {
-            self.skip_white();
-
-            match self.parse_func_arg()? {
-                Some(DynArg::Pos(arg)) => args.add_pos(arg),
-                Some(DynArg::Key(arg)) => args.add_key(arg),
-                None => {},
-            }
-
-            match self.tokens.next() {
-                Some(Spanned { v: Token::Comma, span }) => {
-                    self.add_color_token(ColorToken::Comma, span);
-                }
-                Some(Spanned { v: Token::RightBracket, span }) => {
-                    self.add_color_token(ColorToken::Bracket, span);
-                    break;
-                }
-                _ => error!("expected comma or closing bracket"),
-            }
-        }
-
-        Ok(args)
-    }
-
-    /// Parse one argument to a function.
-    fn parse_func_arg(&mut self) -> ParseResult<Option<DynArg>> {
-        let token = match self.tokens.peek() {
-            Some(token) => token,
-            None => return Ok(None),
-        };
-
-        Ok(match token.v {
-            Token::Text(name) => {
-                self.advance();
-                self.skip_white();
-
-                Some(match self.tokens.peek() {
-                    Some(Spanned { v: Token::Equals, span }) => {
-                        self.advance();
-                        self.skip_white();
-
-                        let name = Ident::new(name.to_string())?;
-                        let key = Spanned::new(name, token.span);
-
-                        self.add_color_token(ColorToken::KeyArg, key.span);
-                        self.add_color_token(ColorToken::Equals, span);
-
-                        let next = self.tokens.next()
-                            .ok_or_else(|| error!(@"expected expression"))?;
-
-                        let value = Self::parse_expression(next)?;
-
-                        self.add_expr_token(&value);
-
-                        let span = Span::merge(key.span, value.span);
-                        let arg = KeyArg { key, value };
-
-                        DynArg::Key(Spanned::new(arg, span))
-                    }
-
-                    _ => {
-                        let expr = Self::parse_expression(token)?;
-                        self.add_expr_token(&expr);
-                        DynArg::Pos(expr)
-                    }
-                })
-            }
-
-            Token::Quoted(_) => {
-                self.advance();
-                self.skip_white();
-
-                self.add_color_token(ColorToken::ExprStr, token.span);
-
-                Some(DynArg::Pos(Self::parse_expression(token)?))
-            }
-
-            _ => None,
-        })
-    }
-
-    /// Parse a function call.
-    fn parse_func_call(&mut self, name: Spanned<Ident>, args: FuncArgs)
-    -> ParseResult<(FuncCall, Option<Span>)> {
-        // Now we want to parse this function dynamically.
-        let parser = self
-            .ctx
-            .scope
-            .get_parser(&name.v.0)
-            .ok_or_else(|| error!(@"unknown function: `{}`", &name.v))?;
-
-        let has_body = self.tokens.peek().map(Spanned::value) == Some(Token::LeftBracket);
-
-        // Do the parsing dependent on whether the function has a body.
-        Ok(if has_body {
-            self.advance();
-
-            // Find out the string which makes the body of this function.
-            let start_index = self.tokens.string_index();
-            let mut start_pos = self.tokens.get_position();
-            start_pos.column -= 1;
-
-            let (mut end_index, mut end_pos) =
-                find_closing_bracket(&self.src[start_index..])
-                    .ok_or_else(|| error!(@"expected closing bracket"))?;
-
-            end_index += start_index;
-            end_pos.column += 1;
-
-            let span = Span::new(start_pos, end_pos);
-
-            // Parse the body.
-            let body_string = &self.src[start_index..end_index];
-            let body = parser(args, Some(body_string), self.ctx)?;
-
-            // Skip to the end of the function in the token stream.
-            self.tokens.set_string_index(end_index);
-
-            // Now the body should be closed.
-            let token = self.tokens.next().expect("parse_func_body: expected token");
-            assert!(token.v == Token::RightBracket);
-
-            (FuncCall(body), Some(span))
-        } else {
-            (FuncCall(parser(args, None, self.ctx)?), None)
-        })
-    }
-
-    /// Parse an expression.
-    fn parse_expression(token: Spanned<Token>) -> ParseResult<Spanned<Expression>> {
-        Ok(Spanned::new(match token.v {
-            Token::Quoted(text) => Expression::Str(text.to_owned()),
-            Token::Text(text) => {
-                if let Ok(b) = text.parse::<bool>() {
-                    Expression::Bool(b)
-                } else if let Ok(num) = text.parse::<f64>() {
-                    Expression::Num(num)
-                } else if let Ok(size) = text.parse::<Size>() {
-                    Expression::Size(size)
-                } else {
-                    // This loop does not actually loop, but is used for breaking.
-                    loop {
-                        if text.ends_with('%') {
-                            if let Ok(percent) = text[.. text.len()-1].parse::<f64>() {
-                                break Expression::Num(percent / 100.0);
-                            }
-                        }
-
-                        break Expression::Ident(Ident::new(text.to_string())?);
-                    }
-                }
-            }
-            _ => error!("expected expression"),
-        }, token.span))
-    }
-
-    /// Parse whitespace (as long as there is any) and skip over comments.
-    fn parse_white(&mut self) -> ParseResult<()> {
-        let mut state = NewlineState::Zero;
-
-        while let Some(token) = self.tokens.peek() {
-            match token.v {
-                Token::Space => {
-                    self.advance();
-                    match state {
-                        NewlineState::Zero | NewlineState::TwoOrMore => {
-                            self.add_space(token.span);
-                        }
-                        _ => {}
-                    }
-                }
-
-                Token::Newline => {
-                    self.advance();
-                    match state {
-                        NewlineState::Zero => state = NewlineState::One(token.span),
-                        NewlineState::One(span) => {
-                            self.add(Node::Newline, Span::merge(span, token.span));
-                            state = NewlineState::TwoOrMore;
-                        },
-                        NewlineState::TwoOrMore => self.add_space(token.span),
-                    }
-                }
-
-                _ => {
-                    if let NewlineState::One(span) = state {
-                        self.add_space(Span::new(span.start, token.span.start));
-                    }
-
-                    state = NewlineState::Zero;
-                    match token.v {
-                        Token::LineComment(_) | Token::BlockComment(_) => self.advance(),
-                        Token::StarSlash => error!("unexpected end of block comment"),
-                        _ => break,
-                    }
-                }
-            }
-        }
-
-        Ok(())
-    }
-
-    /// Skip over whitespace and comments.
-    fn skip_white(&mut self) {
-        while let Some(token) = self.tokens.peek() {
-            match token.v {
-                Token::Space | Token::Newline |
-                Token::LineComment(_) | Token::BlockComment(_) => self.advance(),
-                _ => break,
-            }
-        }
-    }
-
-    /// Advance the iterator by one step.
-    fn advance(&mut self) {
-        self.tokens.next();
-    }
-
-    /// Append a node to the tree.
-    fn add(&mut self, node: Node, span: Span) {
-        self.tree.nodes.push(Spanned::new(node, span));
-    }
-
-    /// Append a space, merging with a previous space if there is one.
-    fn add_space(&mut self, span: Span) {
-        match self.tree.nodes.last_mut() {
-            Some(ref mut node) if node.v == Node::Space => node.span.expand(span),
-            _ => self.add(Node::Space, span),
-        }
-    }
-
-    /// Advance and return the given node.
-    fn add_consumed(&mut self, node: Node, span: Span) {
-        self.advance();
-        self.add(node, span);
-    }
-
-    /// Add a color token to the list.
-    fn add_color_token(&mut self, token: ColorToken, span: Span) {
-        self.color_tokens.push(Spanned::new(token, span));
-    }
-
-    /// Add a color token for an expression.
-    fn add_expr_token(&mut self, expr: &Spanned<Expression>) {
-        let kind = match expr.v {
-            Expression::Bool(_) => ColorToken::ExprBool,
-            Expression::Ident(_) => ColorToken::ExprIdent,
-            Expression::Num(_) => ColorToken::ExprNumber,
-            Expression::Size(_) => ColorToken::ExprSize,
-            Expression::Str(_) => ColorToken::ExprStr,
-        };
-
-        self.add_color_token(kind, expr.span);
-    }
-}
-
-/// Find the index of the first unbalanced and unescaped closing bracket.
-fn find_closing_bracket(src: &str) -> Option<(usize, Position)> {
-    let mut parens = 0;
-    let mut escaped = false;
-    let mut line = 1;
-    let mut line_start_index = 0;
-
-    for (index, c) in src.char_indices() {
-        match c {
-            '\\' => {
-                escaped = !escaped;
-                continue;
-            }
-            c if is_newline_char(c) => {
-                line += 1;
-                line_start_index = index + c.len_utf8();
-            }
-            ']' if !escaped && parens == 0 => {
-                let position = Position {
-                    line,
-                    column: index - line_start_index,
-                };
-
-                return Some((index, position))
-            }
-            '[' if !escaped => parens += 1,
-            ']' if !escaped => parens -= 1,
-            _ => {}
-        }
-        escaped = false;
-    }
-    None
-}
-
-/// A peekable iterator for tokens which allows access to the original iterator
-/// inside this module (which is needed by the parser).
-#[derive(Debug, Clone)]
-struct PeekableTokens<'s> {
-    tokens: Tokens<'s>,
-    peeked: Option<Option<Spanned<Token<'s>>>>,
-}
-
-impl<'s> PeekableTokens<'s> {
-    /// Create a new iterator from a string.
-    fn new(tokens: Tokens<'s>) -> PeekableTokens<'s> {
-        PeekableTokens {
-            tokens,
-            peeked: None,
-        }
-    }
-
-    /// Peek at the next element.
-    fn peek(&mut self) -> Option<Spanned<Token<'s>>> {
-        let iter = &mut self.tokens;
-        *self.peeked.get_or_insert_with(|| iter.next())
-    }
-
-    fn get_position(&self) -> Position {
-        match self.peeked {
-            Some(Some(peeked)) => peeked.span.start,
-            _ => self.tokens.get_position(),
-        }
-    }
-
-    fn string_index(&self) -> usize {
-        match self.peeked {
-            Some(Some(peeked)) => peeked.span.start.line,
-            _ => self.tokens.string_index(),
-        }
-    }
-
-    fn set_string_index(&mut self, index: usize) {
-        self.tokens.set_string_index(index);
-        self.peeked = None;
-    }
-}
-
-impl<'s> Iterator for PeekableTokens<'s> {
-    type Item = Spanned<Token<'s>>;
-
-    fn next(&mut self) -> Option<Self::Item> {
-        match self.peeked.take() {
-            Some(value) => value,
-            None => self.tokens.next(),
-        }
-    }
-}
-
-
-#[cfg(test)]
-#[allow(non_snake_case)]
-mod tests {
-    use crate::func::{Commands, Scope};
-    use crate::layout::{LayoutContext, LayoutResult};
-    use crate::syntax::*;
-    use Node::{Func as F, Newline as N, Space as S};
-
-    function! {
-        /// A testing function which just parses it's body into a syntax
-        /// tree.
-        #[derive(Debug)]
-        pub struct TreeFn { pub tree: SyntaxTree }
-
-        parse(args, body, ctx) {
-            args.clear();
-            TreeFn {
-                tree: parse!(expected: body, ctx)
-            }
-        }
-
-        layout() { vec![] }
-    }
-
-    impl PartialEq for TreeFn {
-        fn eq(&self, other: &TreeFn) -> bool {
-            assert_tree_equal(&self.tree, &other.tree);
-            true
-        }
-    }
-
-    function! {
-        /// A testing function without a body.
-        #[derive(Debug, Default, PartialEq)]
-        pub struct BodylessFn(Vec<Expression>, Vec<(Ident, Expression)>);
-
-        parse(args, body) {
-            parse!(forbidden: body);
-            BodylessFn(
-                args.pos().map(Spanned::value).collect(),
-                args.keys().map(|arg| (arg.v.key.v, arg.v.value.v)).collect(),
-            )
-        }
-
-        layout() { vec![] }
-    }
-
-    mod args {
-        use super::*;
-        use super::Expression;
-        pub use Expression::{Num as N, Size as Z, Bool as B};
-
-        pub fn S(string: &str) -> Expression { Expression::Str(string.to_owned()) }
-        pub fn I(string: &str) -> Expression {
-            Expression::Ident(Ident::new(string.to_owned()).unwrap())
-        }
-    }
-
-    /// Asserts that two syntax trees are equal except for all spans inside them.
-    fn assert_tree_equal(a: &SyntaxTree, b: &SyntaxTree) {
-        for (x, y) in a.nodes.iter().zip(&b.nodes) {
-            if x.v != y.v {
-                panic!("trees are not equal: ({:#?}) != ({:#?})", x.v, y.v);
-            }
-        }
-    }
-
-    /// Test if the source code parses into the syntax tree.
-    fn test(src: &str, tree: SyntaxTree) {
-        let ctx = ParseContext {
-            scope: &Scope::new(),
-        };
-        assert_tree_equal(&parse(src, ctx).unwrap(), &tree);
-    }
-
-    /// Test with a scope containing function definitions.
-    fn test_scoped(scope: &Scope, src: &str, tree: SyntaxTree) {
-        let ctx = ParseContext { scope };
-        assert_tree_equal(&parse(src, ctx).unwrap(), &tree);
-    }
-
-    /// Test if the source parses into the error.
-    fn test_err(src: &str, err: &str) {
-        let ctx = ParseContext {
-            scope: &Scope::new(),
-        };
-        assert_eq!(parse(src, ctx).unwrap_err().to_string(), err);
-    }
-
-    /// Test with a scope if the source parses into the error.
-    fn test_err_scoped(scope: &Scope, src: &str, err: &str) {
-        let ctx = ParseContext { scope };
-        assert_eq!(parse(src, ctx).unwrap_err().to_string(), err);
-    }
-
-    fn test_color(scope: &Scope, src: &str, tokens: Vec<(usize, usize, ColorToken)>) {
-        let ctx = ParseContext { scope };
-        let tree = parse(src, ctx).unwrap();
-        // assert_eq!(tree.tokens,
-        //     tokens.into_iter()
-        //         .map(|(s, e, t)| Spanned::new(t, Span::new(s, e)))
-        //         .collect::<Vec<_>>()
-        // );
-    }
-
-    /// Create a text node.
-    fn T(s: &str) -> Node {
-        Node::Text(s.to_owned())
-    }
-
-    fn zerospan<T>(val: T) -> Spanned<T> {
-        Spanned::new(val, Span::new(Position::new(0, 0), Position::new(0, 0)))
-    }
-
-    /// Shortcut macro to create a syntax tree. Is `vec`-like and the elements
-    /// are the nodes without spans.
-    macro_rules! tree {
-        ($($x:expr),*) => ({
-            #[allow(unused_mut)] let mut nodes = vec![];
-            $(
-                nodes.push(zerospan($x));
-            )*
-            SyntaxTree { nodes }
-        });
-        ($($x:expr,)*) => (tree![$($x),*])
-    }
-
-    /// Shortcut macro to create a function.
-    macro_rules! func {
-        () => (
-            FuncCall(Box::new(BodylessFn(vec![], vec![])))
-        );
-        (body: $tree:expr $(,)*) => (
-            FuncCall(Box::new(TreeFn { tree: $tree }))
-        );
-        (args: $pos:expr, $key:expr) => (
-            FuncCall(Box::new(BodylessFn($pos, $key)))
-        );
-    }
-
-    /// Parse the basic cases.
-    #[test]
-    #[rustfmt::skip]
-    fn parse_base() {
-        test("", tree! []);
-        test("Hello World!", tree! [ T("Hello"), S, T("World!") ]);
-    }
-
-    /// Test whether newlines generate the correct whitespace.
-    #[test]
-    #[rustfmt::skip]
-    fn parse_newlines_whitespace() {
-        test("Hello\nWorld", tree! [ T("Hello"), S, T("World") ]);
-        test("Hello \n World", tree! [ T("Hello"), S, T("World") ]);
-        test("Hello\n\nWorld", tree! [ T("Hello"), N, T("World") ]);
-        test("Hello \n\nWorld", tree! [ T("Hello"), S, N, T("World") ]);
-        test("Hello\n\n  World", tree! [ T("Hello"), N, S, T("World") ]);
-        test("Hello \n \n \n  World", tree! [ T("Hello"), S, N, S, T("World") ]);
-        test("Hello\n \n\n  World", tree! [ T("Hello"), N, S, T("World") ]);
-        test("Hello\n \nWorld", tree! [ T("Hello"), N, T("World") ]);
-    }
-
-    /// Parse things dealing with functions.
-    #[test]
-    #[rustfmt::skip]
-    fn parse_functions() {
-        let mut scope = Scope::new();
-        scope.add::<BodylessFn>("test");
-        scope.add::<BodylessFn>("end");
-        scope.add::<TreeFn>("modifier");
-        scope.add::<TreeFn>("func");
-
-        test_scoped(&scope,"[test]", tree! [ F(func! {}) ]);
-        test_scoped(&scope,"[ test]", tree! [ F(func! {}) ]);
-        test_scoped(&scope, "This is an [modifier][example] of a function invocation.", tree! [
-            T("This"), S, T("is"), S, T("an"), S,
-            F(func! { body: tree! [ T("example") ] }), S,
-            T("of"), S, T("a"), S, T("function"), S, T("invocation.")
-        ]);
-        test_scoped(&scope, "[func][Hello][modifier][Here][end]",  tree! [
-            F(func! { body: tree! [ T("Hello") ] }),
-            F(func! { body: tree! [ T("Here") ] }),
-            F(func! {}),
-        ]);
-        test_scoped(&scope, "[func][]", tree! [ F(func! { body: tree! [] }) ]);
-        test_scoped(&scope, "[modifier][[func][call]] outside", tree! [
-            F(func! { body: tree! [ F(func! { body: tree! [ T("call") ] }) ] }), S, T("outside")
-        ]);
-
-    }
-
-    /// Parse functions with arguments.
-    #[test]
-    #[rustfmt::skip]
-    fn parse_function_args() {
-        use args::*;
-
-        fn func(
-            pos: Vec<Expression>,
-            key: Vec<(&str, Expression)>,
-        ) -> SyntaxTree {
-            let key = key.into_iter()
-                .map(|s| (Ident::new(s.0.to_string()).unwrap(), s.1))
-                .collect();
-
-            tree! [ F(func!(args: pos, key)) ]
-        }
-
-        let mut scope = Scope::new();
-        scope.add::<BodylessFn>("align");
-
-        test_scoped(&scope, "[align: left]", func(vec![I("left")], vec![]));
-        test_scoped(&scope, "[align: left,right]", func(vec![I("left"), I("right")], vec![]));
-        test_scoped(&scope, "[align: left, right]", func(vec![I("left"), I("right")], vec![]));
-        test_scoped(&scope, "[align: \"hello\"]", func(vec![S("hello")], vec![]));
-        test_scoped(&scope, r#"[align: "hello\"world"]"#, func(vec![S(r#"hello\"world"#)], vec![]));
-        test_scoped(&scope, "[align: 12]", func(vec![N(12.0)], vec![]));
-        test_scoped(&scope, "[align: 17.53pt]", func(vec![Z(Size::pt(17.53))], vec![]));
-        test_scoped(&scope, "[align: 2.4in]", func(vec![Z(Size::inches(2.4))], vec![]));
-        test_scoped(&scope, "[align: true, 10mm, left, \"hi, there\"]",
-            func(vec![B(true), Z(Size::mm(10.0)), I("left"), S("hi, there")], vec![]));
-
-        test_scoped(&scope, "[align: right=true]", func(vec![], vec![("right", B(true))]));
-        test_scoped(&scope, "[align: flow = horizontal]",
-            func(vec![], vec![("flow", I("horizontal"))]));
-        test_scoped(&scope, "[align: x=1cm, y=20mm]",
-            func(vec![], vec![("x", Z(Size::cm(1.0))), ("y", Z(Size::mm(20.0)))]));
-        test_scoped(&scope, "[align: x=5.14,a, \"b\", c=me,d=you]",
-            func(vec![I("a"), S("b")], vec![("x", N(5.14)), ("c", I("me")), ("d", I("you"))]));
-    }
-
-    /// Parse comments (line and block).
-    #[test]
-    #[rustfmt::skip]
-    fn parse_comments() {
-        let mut scope = Scope::new();
-        scope.add::<BodylessFn>("test");
-        scope.add::<TreeFn>("func");
-
-        test_scoped(&scope, "Text\n// Comment\n More text",
-            tree! [ T("Text"), S, T("More"), S, T("text") ]);
-        test_scoped(&scope, "[test/*world*/]",
-            tree! [ F(func! {}) ]);
-        test_scoped(&scope, "[test/*]*/]",
-            tree! [ F(func! {}) ]);
-    }
-
-    /// Test if escaped, but unbalanced parens are correctly parsed.
-    #[test]
-    #[rustfmt::skip]
-    fn parse_unbalanced_body_parens() {
-        let mut scope = Scope::new();
-        scope.add::<TreeFn>("code");
-
-        test_scoped(&scope, r"My [code][Close \]] end", tree! [
-            T("My"), S, F(func! { body: tree! [ T("Close"), S, T("]") ] }), S, T("end")
-        ]);
-        test_scoped(&scope, r"My [code][\[ Open] end", tree! [
-            T("My"), S, F(func! { body: tree! [ T("["), S, T("Open") ] }), S, T("end")
-        ]);
-        test_scoped(&scope, r"My [code][Open \]  and  \[ close]end", tree! [
-            T("My"), S, F(func! { body:
-                tree! [ T("Open"), S, T("]"), S, T("and"), S, T("["), S, T("close") ]
-            }), T("end")
-        ]);
-    }
-
-    /// Tests if the parser handles non-ASCII stuff correctly.
-    #[test]
-    #[rustfmt::skip]
-    fn parse_unicode() {
-        let mut scope = Scope::new();
-        scope.add::<BodylessFn>("func");
-        scope.add::<TreeFn>("bold");
-
-        test_scoped(&scope, "[func] ⺐.", tree! [ F(func! {}), S, T("⺐.") ]);
-        test_scoped(&scope, "[bold][Hello 🌍!]", tree! [
-            F(func! { body: tree! [ T("Hello"), S, T("🌍!") ] })
-        ]);
-    }
-
-    /// Tests whether spans get calculated correctly.
-    #[test]
-    #[rustfmt::skip]
-    fn parse_spans() {
-        fn test_span(src: &str, correct: Vec<(usize, usize, usize, usize)>) {
-            let mut scope = Scope::new();
-            scope.add::<TreeFn>("hello");
-            let tree = parse(src, ParseContext { scope: &scope }).unwrap();
-            let spans = tree.nodes.into_iter()
-                .map(|node| {
-                    let Span { start, end } = node.span;
-                    (start.line, start.column, end.line, end.column)
-                })
-                .collect::<Vec<_>>();
-
-            assert_eq!(spans, correct);
-        }
-
-        test_span("hello world", vec![(1, 0, 1, 5), (1, 5, 1, 6), (1, 6, 1, 11)]);
-        test_span("p1\n \np2", vec![(1, 0, 1, 2), (1, 2, 2, 2), (3, 0, 3, 2)]);
-
-        let src = "func\n [hello: pos, other][body\r\n _🌍_\n]";
-        test_span(src, vec![
-            (1, 0, 1, 4),
-            (1, 4, 2, 1),
-            (2, 1, 4, 1)
-        ]);
-    }
-
-    /// Tests whether errors get reported correctly.
-    #[test]
-    #[rustfmt::skip]
-    fn parse_errors() {
-        let mut scope = Scope::new();
-        scope.add::<TreeFn>("hello");
-
-        test_err("No functions here]", "unexpected closing bracket");
-        test_err_scoped(&scope, "[hello][world", "expected closing bracket");
-        test_err("[hello world", "expected arguments or closing bracket");
-        test_err("[ no^name][Why?]", "invalid identifier: `no^name`");
-        test_err("Hello */", "unexpected end of block comment");
-    }
-
-    /// Tests syntax highlighting.
-    #[test]
-    #[rustfmt::skip]
-    fn test_highlighting() {
-        use ColorToken::{Bracket as B, FuncName as F, *};
-
-        let mut scope = Scope::new();
-        scope.add::<BodylessFn>("func");
-        scope.add::<TreeFn>("tree");
-
-        test_color(&scope, "[func]", vec![(0, 1, B), (1, 5, F), (5, 6, B)]);
-        test_color(&scope, "[func: 12pt]", vec![
-            (0, 1, B), (1, 5, F), (5, 6, Colon), (7, 11, ExprSize), (11, 12, B)
-        ]);
-        test_color(&scope, "[func: x=25.3, y=\"hi\"]", vec![
-            (0, 1, B), (1, 5, F), (5, 6, Colon),
-            (7, 8, KeyArg), (8, 9, Equals), (9, 13, ExprNumber),
-            (13, 14, Comma),
-            (15, 16, KeyArg), (16, 17, Equals), (17, 21, ExprStr),
-            (21, 22, B),
-        ]);
-
-        test_color(&scope, "Hello [tree][With [func: 3]]", vec![
-            (6, 7, B), (7, 11, F), (11, 12, B),
-            (12, 13, B), (18, 19, B)
-        ]);
-    }
-}
diff --git a/src/syntax/span.rs b/src/syntax/span.rs
index bc7001a96..bbb6a2061 100644
--- a/src/syntax/span.rs
+++ b/src/syntax/span.rs
@@ -45,8 +45,6 @@ impl Span {
     }
 
     pub fn merge(a: Span, b: Span) -> Span {
-        let start = a.start.min(b.start);
-
         Span {
             start: a.start.min(b.start),
             end: a.end.max(b.end),
diff --git a/src/syntax/tokens.rs b/src/syntax/tokens.rs
index cf37fe483..efcd1fc0a 100644
--- a/src/syntax/tokens.rs
+++ b/src/syntax/tokens.rs
@@ -1,88 +1,87 @@
-//! Tokenization of source code.
-
-use std::str::CharIndices;
-use smallvec::SmallVec;
+use std::iter::Peekable;
+use std::str::Chars;
 
 use super::*;
+use Token::*;
+use State::*;
 
 
-/// Builds an iterator over the tokens of the source code.
 pub fn tokenize(src: &str) -> Tokens {
     Tokens::new(src)
 }
 
-/// An iterator over the tokens of source code.
-#[derive(Debug, Clone)]
-pub struct Tokens<'s> {
-    src: &'s str,
-    chars: PeekableChars<'s>,
-    state: TokensState,
-    stack: SmallVec<[TokensState; 1]>,
-    line: usize,
-    line_start_index: usize,
+/// A minimal semantic entity of source code.
+#[derive(Debug, Clone, PartialEq)]
+pub enum Token<'s> {
+    /// One or more whitespace characters. The contained `usize` denotes the
+    /// number of newlines that were contained in the whitespace.
+    Whitespace(usize),
+
+    /// A line comment with inner string contents `//<&'s str>\n`.
+    LineComment(&'s str),
+    /// A block comment with inner string contents `/*<&'s str>*/`. The comment
+    /// can contain nested block comments.
+    BlockComment(&'s str),
+    /// An erroneous `*/` without an opening block comment.
+    StarSlash,
+
+    /// A left bracket: `[`.
+    LeftBracket,
+    /// A right bracket: `]`.
+    RightBracket,
+
+    /// A left parenthesis in a function header: `(`.
+    LeftParen,
+    /// A right parenthesis in a function header: `)`.
+    RightParen,
+    /// A left brace in a function header: `{`.
+    LeftBrace,
+    /// A right brace in a function header: `}`.
+    RightBrace,
+
+    /// A colon in a function header: `:`.
+    Colon,
+    /// A comma in a function header: `:`.
+    Comma,
+    /// An equals sign in a function header: `=`.
+    Equals,
+
+    /// An expression in a function header.
+    Expr(Expression),
+
+    /// A star in body-text.
+    Star,
+    /// An underscore in body-text.
+    Underscore,
+    /// A backtick in body-text.
+    Backtick,
+
+    /// Any other consecutive string.
+    Text(&'s str),
+}
+
+/// An iterator over the tokens of a string of source code.
+pub struct Tokens<'s> {
+    src: &'s str,
+    chars: Characters<'s>,
+    state: State,
+    stack: Vec<State>,
 }
 
-/// The state the tokenizer is in.
 #[derive(Debug, Copy, Clone, Eq, PartialEq)]
-enum TokensState {
-    /// The base state if there is nothing special we are in.
+enum State {
+    Header,
+    StartBody,
     Body,
-    /// Inside a function header. Here colons and equal signs get parsed
-    /// as distinct tokens rather than text.
-    Function,
-    /// We expect either the end of the function or the beginning of the body.
-    MaybeBody,
 }
 
 impl<'s> Tokens<'s> {
-    /// Create a new token stream from source code.
     pub fn new(src: &'s str) -> Tokens<'s> {
         Tokens {
             src,
-            chars: PeekableChars::new(src),
-            state: TokensState::Body,
-            stack: SmallVec::new(),
-            line: 1,
-            line_start_index: 0,
-        }
-    }
-
-    /// The index of the first character of the next token in the source string.
-    pub fn string_index(&self) -> usize {
-        self.chars.string_index()
-    }
-
-    /// Go to a new position in the underlying string.
-    pub fn set_string_index(&mut self, index: usize) {
-        self.chars.set_string_index(index);
-    }
-
-    /// The current position in the source.
-    pub fn get_position(&self) -> Position {
-        self.line_position(self.string_index())
-    }
-
-    /// Advance the iterator by one step.
-    fn advance(&mut self) {
-        self.chars.next();
-    }
-
-    /// Switch to the given state.
-    fn switch(&mut self, state: TokensState) {
-        self.stack.push(self.state);
-        self.state = state;
-    }
-
-    /// Go back to the top-of-stack state.
-    fn unswitch(&mut self) {
-        self.state = self.stack.pop().unwrap_or(TokensState::Body);
-    }
-
-    /// The `Position` with line and column for a string index.
-    fn line_position(&self, index: usize) -> Position {
-        Position {
-            line: self.line,
-            column: index - self.line_start_index,
+            chars: Characters::new(src),
+            state: State::Body,
+            stack: vec![],
         }
     }
 }
@@ -90,455 +89,281 @@ impl<'s> Tokens<'s> {
 impl<'s> Iterator for Tokens<'s> {
     type Item = Spanned<Token<'s>>;
 
-    /// Advance the iterator, return the next token or nothing.
-    fn next(&mut self) -> Option<Self::Item> {
-        use TokensState as TS;
+    /// Parse the next token in the source code.
+    fn next(&mut self) -> Option<Spanned<Token<'s>>> {
+        let start = self.chars.position();
+        let first = self.chars.next()?;
+        let second = self.chars.peek();
 
-        // Go to the body state if the function has a body or return to the top-of-stack
-        // state.
-        if self.state == TS::MaybeBody {
-            if let Some((index, '[')) = self.chars.peek() {
-                self.advance();
-                self.state = TS::Body;
-                let span = Span::at(self.line_position(index));
-                return Some(Spanned::new(Token::LeftBracket, span));
-            } else {
-                self.unswitch();
-            }
-        }
+        let token = match first {
+            // Comments.
+            '/' if second == Some('/') => self.parse_line_comment(),
+            '/' if second == Some('*') => self.parse_block_comment(),
+            '*' if second == Some('/') => { self.eat(); StarSlash }
 
-        // Take the next char and peek at the one behind.
-        let (pos, next) = self.chars.next()?;
-        let afterwards = self.chars.peekc();
+            // Whitespace.
+            c if c.is_whitespace() => self.parse_whitespace(c),
 
-        /// The index at which the line ended, if it did.
-        let mut eol = None;
-
-        let token = match next {
-            // Functions
-            '[' => {
-                self.switch(TS::Function);
-                Token::LeftBracket
-            }
+            // Functions.
+            '[' => { self.set_state(Header); LeftBracket }
             ']' => {
-                if self.state == TS::Function {
-                    self.state = TS::MaybeBody;
+                if self.state == Header && second == Some('[') {
+                    self.state = StartBody;
                 } else {
-                    self.unswitch();
+                    self.pop_state();
                 }
 
-                Token::RightBracket
+                RightBracket
             }
 
-            // Line comment
-            '/' if afterwards == Some('/') => {
-                let start = self.string_index() + 1;
+            // Syntactic elements in function headers.
+            '(' if self.state == Header => LeftParen,
+            ')' if self.state == Header => RightParen,
+            '{' if self.state == Header => LeftBrace,
+            '}' if self.state == Header => RightBrace,
+            ':' if self.state == Header => Colon,
+            ',' if self.state == Header => Comma,
+            '=' if self.state == Header => Equals,
 
-                while let Some(c) = self.chars.peekc() {
-                    if is_newline_char(c) {
-                        break;
-                    }
-                    self.advance();
-                }
+            // String values.
+            '"' if self.state == Header => self.parse_string(),
 
-                let end = self.string_index();
-                Token::LineComment(&self.src[start..end])
-            }
+            // Style toggles.
+            '*' if self.state == Body => Star,
+            '_' if self.state == Body => Underscore,
+            '`' if self.state == Body => Backtick,
 
-            // Block comment
-            '/' if afterwards == Some('*') => {
-                let start = self.string_index() + 1;
-                let mut nested = 0;
+            // An escaped thing.
+            '\\' => self.parse_escaped(),
 
-                while let Some((_, c)) = self.chars.next() {
-                    let after = self.chars.peekc();
-                    match (c, after) {
-                        ('*', Some('/')) if nested == 0 => {
-                            self.advance();
-                            break;
-                        }
-                        ('/', Some('*')) => {
-                            self.advance();
-                            nested += 1
-                        }
-                        ('*', Some('/')) => {
-                            self.advance();
-                            nested -= 1
-                        }
-                        _ => {}
-                    }
-                }
-
-                let end = self.string_index() - 2;
-                Token::BlockComment(&self.src[start..end])
-            }
-
-            // Unexpected end of block comment
-            '*' if afterwards == Some('/') => {
-                self.advance();
-                Token::StarSlash
-            }
-
-            // Whitespace
-            ' ' | '\t' => {
-                while let Some(c) = self.chars.peekc() {
-                    match c {
-                        ' ' | '\t' => self.advance(),
-                        _ => break,
-                    }
-                }
-
-                Token::Space
-            }
-
-            // Newlines
-            '\r' if afterwards == Some('\n') => {
-                self.advance();
-                eol = Some(pos + "\r\n".len());
-                Token::Newline
-            }
-            c if is_newline_char(c) => {
-                eol = Some(pos + c.len_utf8());
-                Token::Newline
-            }
-
-            // Star/Underscore/Backtick in bodies
-            '*' if self.state == TS::Body => Token::Star,
-            '_' if self.state == TS::Body => Token::Underscore,
-            '`' if self.state == TS::Body => Token::Backtick,
-
-            // Context sensitive operators in headers
-            ':' if self.state == TS::Function => Token::Colon,
-            '=' if self.state == TS::Function => Token::Equals,
-            ',' if self.state == TS::Function => Token::Comma,
-
-            // A string value.
-            '"' if self.state == TS::Function => {
-                let start = self.string_index();
-                let mut end = start;
-                let mut escaped = false;
-
-                while let Some((index, c)) = self.chars.next() {
-                    end = index;
-                    if c == '"' && !escaped {
-                        break;
-                    }
-
-                    escaped = c == '\\';
-                }
-
-                Token::Quoted(&self.src[start..end])
-            }
-
-            // Escaping
-            '\\' => {
-                if let Some((index, c)) = self.chars.peek() {
-                    let escapable = match c {
-                        '[' | ']' | '\\' | '*' | '_' | '`' | ':' | '=' | ',' | '/' => true,
+            // Expressions or just strings.
+            c => {
+                let word = self.read_string_until(|n| {
+                    match n {
+                        c if c.is_whitespace() => true,
+                        '\\' | '[' | ']' | '*' | '_' | '`' | ':' | '=' |
+                        ',' | '"' | '/' => true,
                         _ => false,
-                    };
-
-                    if escapable {
-                        self.advance();
-                        Token::Text(&self.src[index..index + c.len_utf8()])
-                    } else {
-                        Token::Text("\\")
                     }
+                }, false, -(c.len_utf8() as isize), 0);
+
+                if self.state == Header {
+                    self.parse_expr(word)
                 } else {
-                    Token::Text("\\")
+                    Text(word)
                 }
             }
-
-            // Normal text
-            _ => {
-                // Find out when the word ends.
-                while let Some((_, c)) = self.chars.peek() {
-                    let second = self.chars.peekn(1).map(|p| p.1);
-
-                    // Whether the next token is still from the text or not.
-                    let continues = match c {
-                        '[' | ']' | '\\' => false,
-                        '*' | '_' | '`' if self.state == TS::Body => false,
-                        ':' | '=' | ',' | '"' if self.state == TS::Function => false,
-
-                        '/' => second != Some('/') && second != Some('*'),
-                        '*' => second != Some('/'),
-
-                        ' ' | '\t' => false,
-                        c if is_newline_char(c) => false,
-
-                        _ => true,
-                    };
-
-                    if !continues {
-                        break;
-                    }
-
-                    self.advance();
-                }
-
-                let end = self.string_index();
-                Token::Text(&self.src[pos..end])
-            }
         };
 
-        let start = self.line_position(pos);
-        let end = self.get_position();
-        let span = Span::new(start, end);
+        let end = self.chars.position();
+        let span = Span { start, end };
 
-        if let Some(index) = eol {
-            self.line += 1;
-            self.line_start_index = index;
-        }
-
-        Some(Spanned::new(token, span))
+        Some(Spanned { v: token, span })
     }
 }
 
-/// Whether this character is a newline (or starts one).
-pub(crate) fn is_newline_char(character: char) -> bool {
+impl<'s> Tokens<'s> {
+    fn parse_line_comment(&mut self) -> Token<'s> {
+        LineComment(self.read_string_until(is_newline_char, false, 1, 0))
+    }
+
+    fn parse_block_comment(&mut self) -> Token<'s> {
+        enum Last { Slash, Star, Other }
+        use Last::*;
+
+        self.eat();
+
+        let mut depth = 0;
+        let mut last = Last::Other;
+
+        // Find the first `*/` that does not correspond to a nested `/*`.
+        // Remove the last two bytes to obtain the raw inner text without `*/`.
+        BlockComment(self.read_string_until(|n| {
+            match n {
+                '/' => match last {
+                    Star if depth == 0 => return true,
+                    Star => depth -= 1,
+                    _ => last = Slash
+                }
+                '*' => match last {
+                    Slash => depth += 1,
+                    _ => last = Star,
+                }
+                _ => last = Other,
+            }
+
+            false
+        }, true, 0, -2))
+    }
+
+    fn parse_whitespace(&mut self, c: char) -> Token<'s> {
+        let mut newlines = if is_newline_char(c) { 1 } else { 0 };
+        let mut last = c;
+
+        self.read_string_until(|n| {
+            if is_newline_char(n) && !(last == '\r' && n == '\n') {
+                newlines += 1;
+            }
+
+            last = n;
+            !n.is_whitespace()
+        }, false, 0, 0);
+
+        Whitespace(newlines)
+    }
+
+    fn parse_string(&mut self) -> Token<'s> {
+        let mut escaped = false;
+        Expr(Expression::Str(self.read_string_until(|n| {
+            if n == '"' && !escaped {
+                return true;
+            } else if n == '\\' {
+                escaped = !escaped;
+            } else {
+                escaped = false;
+            }
+
+            false
+        }, true, 0, -1).to_string()))
+    }
+
+    fn parse_escaped(&mut self) -> Token<'s> {
+        fn is_escapable(c: char) -> bool {
+            match c {
+                '\\' | '[' | ']' | '*' | '_' | '`' | '/' => true,
+                _ => false,
+            }
+        }
+
+        let c = self.chars.peek().unwrap_or('n');
+        if self.state == Body && is_escapable(c) {
+            let index = self.chars.index();
+            self.eat();
+            Text(&self.src[index .. index + c.len_utf8()])
+        } else {
+            Text("\\")
+        }
+    }
+
+    fn parse_expr(&mut self, word: &'s str) -> Token<'s> {
+        if let Ok(b) = word.parse::<bool>() {
+            Expr(Expression::Bool(b))
+        } else if let Ok(num) = word.parse::<f64>() {
+            Expr(Expression::Num(num))
+        } else if let Ok(num) = parse_percentage(word) {
+            Expr(Expression::Num(num / 100.0))
+        } else if let Ok(size) = word.parse::<Size>() {
+            Expr(Expression::Size(size))
+        } else if let Some(ident) = Ident::new(word) {
+            Expr(Expression::Ident(ident))
+        } else {
+            Text(word)
+        }
+    }
+
+    fn read_string_until<F>(
+        &mut self,
+        mut f: F,
+        eat_match: bool,
+        offset_start: isize,
+        offset_end: isize,
+    ) -> &'s str where F: FnMut(char) -> bool {
+        let start = ((self.chars.index() as isize) + offset_start) as usize;
+        let mut matched = false;
+
+        while let Some(c) = self.chars.peek() {
+            if f(c) {
+                matched = true;
+                if eat_match {
+                    self.chars.next();
+                }
+                break;
+            }
+
+            self.chars.next();
+        }
+
+        let mut end = self.chars.index();
+        if matched {
+            end = ((end as isize) + offset_end) as usize;
+        }
+
+        &self.src[start .. end]
+    }
+
+    fn set_state(&mut self, state: State) {
+        self.stack.push(self.state);
+        self.state = state;
+    }
+
+    fn pop_state(&mut self) {
+        self.state = self.stack.pop().unwrap_or(Body);
+    }
+
+    fn eat(&mut self) {
+        self.chars.next();
+    }
+}
+
+fn parse_percentage(word: &str) -> Result<f64, ()> {
+    if word.ends_with('%') {
+        word[.. word.len() - 1].parse::<f64>().map_err(|_| ())
+    } else {
+        Err(())
+    }
+}
+
+/// Whether this character denotes a newline.
+fn is_newline_char(character: char) -> bool {
     match character {
-        '\n' | '\r' | '\u{000c}' | '\u{0085}' | '\u{2028}' | '\u{2029}' => true,
+        // Line Feed, Vertical Tab, Form Feed, Carriage Return.
+        '\x0A' ..= '\x0D' => true,
+        // Next Line, Line Separator, Paragraph Separator.
+        '\u{0085}' | '\u{2028}' | '\u{2029}' => true,
         _ => false,
     }
 }
 
-/// A (index, char) iterator with double lookahead.
-#[derive(Debug, Clone)]
-struct PeekableChars<'s> {
-    string: &'s str,
-    chars: CharIndices<'s>,
-    peeked: SmallVec<[Option<(usize, char)>; 2]>,
-    base: usize,
+struct Characters<'s> {
+    iter: Peekable<Chars<'s>>,
+    position: Position,
     index: usize,
 }
 
-impl<'s> PeekableChars<'s> {
-    /// Create a new iterator from a string.
-    fn new(string: &'s str) -> PeekableChars<'s> {
-        PeekableChars {
-            string,
-            chars: string.char_indices(),
-            peeked: SmallVec::new(),
-            base: 0,
+impl<'s> Characters<'s> {
+    fn new(src: &'s str) -> Characters<'s> {
+        Characters {
+            iter: src.chars().peekable(),
+            position: Position::new(0, 0),
             index: 0,
         }
     }
 
-    /// Peek at the next element.
-    fn peek(&mut self) -> Option<(usize, char)> {
-        self.peekn(0)
-    }
+    fn next(&mut self) -> Option<char> {
+        let c = self.iter.next()?;
+        let len = c.len_utf8();
 
-    /// Peek at the char of the next element.
-    fn peekc(&mut self) -> Option<char> {
-        self.peekn(0).map(|p| p.1)
-    }
+        self.index += len;
 
-    /// Peek at the element after the next element.
-    fn peekn(&mut self, n: usize) -> Option<(usize, char)> {
-        while self.peeked.len() <= n {
-            let next = self.next_inner();
-            self.peeked.push(next);
+        if is_newline_char(c) && !(c == '\r' && self.peek() == Some('\n')) {
+            self.position.line += 1;
+            self.position.column = 0;
+        } else {
+            self.position.column += len;
         }
 
-        self.peeked[n]
+        Some(c)
     }
 
-    /// Return the next value of the inner iterator mapped with the offset.
-    fn next_inner(&mut self) -> Option<(usize, char)> {
-        self.chars.next().map(|(i, c)| (self.base + i, c))
+    fn peek(&mut self) -> Option<char> {
+        self.iter.peek().copied()
     }
 
-    fn string_index(&self) -> usize {
+    fn index(&self) -> usize {
         self.index
     }
 
-    fn set_string_index(&mut self, index: usize) {
-        self.chars = self.string[index..].char_indices();
-        self.base = index;
-        self.index = 0;
-        self.peeked.clear();
-    }
-}
-
-impl Iterator for PeekableChars<'_> {
-    type Item = (usize, char);
-
-    fn next(&mut self) -> Option<(usize, char)> {
-        let next = if !self.peeked.is_empty() {
-            self.peeked.remove(0)
-        } else {
-            self.next_inner()
-        };
-
-        if let Some((index, c)) = next {
-            self.index = index + c.len_utf8();
-        }
-
-        next
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use Token::{
-        Backtick as TB, BlockComment as BC, Colon as C, Equals as E, LeftBracket as L,
-        LineComment as LC, Newline as N, Quoted as Q, RightBracket as R, Space as S, Star as TS,
-        StarSlash as SS, Text as T, Underscore as TU,
-    };
-
-    /// Test if the source code tokenizes to the tokens.
-    fn test(src: &str, tokens: Vec<Token>) {
-        assert_eq!(Tokens::new(src)
-            .map(|token| token.v)
-            .collect::<Vec<_>>(), tokens);
-    }
-
-    /// Test if the tokens of the source code have the correct spans.
-    fn test_span(src: &str, spans: Vec<(usize, usize, usize, usize)>) {
-        assert_eq!(Tokens::new(src)
-            .map(|token| {
-                let Span { start, end } = token.span;
-                (start.line, start.column, end.line, end.column)
-            })
-            .collect::<Vec<_>>(), spans);
-    }
-
-    /// Tokenizes the basic building blocks.
-    #[test]
-    #[rustfmt::skip]
-    fn tokenize_base() {
-        test("", vec![]);
-        test("Hallo", vec![T("Hallo")]);
-        test("[", vec![L]);
-        test("]", vec![R]);
-        test("*", vec![TS]);
-        test("_", vec![TU]);
-        test("`", vec![TB]);
-        test("\n", vec![N]);
-    }
-
-    /// This test looks if LF- and CRLF-style newlines get both identified correctly.
-    #[test]
-    #[rustfmt::skip]
-    fn tokenize_whitespace_newlines() {
-        test(" \t", vec![S]);
-        test("First line\r\nSecond line\nThird line\n", vec![
-            T("First"), S, T("line"), N, T("Second"), S, T("line"), N,
-            T("Third"), S, T("line"), N
-        ]);
-        test("Hello \n ", vec![T("Hello"), S, N, S]);
-        test("Dense\nTimes", vec![T("Dense"), N, T("Times")]);
-    }
-
-    /// Tests if escaping with backslash works as it should.
-    #[test]
-    #[rustfmt::skip]
-    fn tokenize_escape() {
-        test(r"\[", vec![T("[")]);
-        test(r"\]", vec![T("]")]);
-        test(r"\**", vec![T("*"), TS]);
-        test(r"\*", vec![T("*")]);
-        test(r"\__", vec![T("_"), TU]);
-        test(r"\_", vec![T("_")]);
-        test(r"\hello", vec![T("\\"), T("hello")]);
-    }
-
-    /// Tests if escaped strings work.
-    #[test]
-    #[rustfmt::skip]
-    fn tokenize_quoted() {
-        test(r#"[align: "hello\"world"]"#, vec![L, T("align"), C, S, Q(r#"hello\"world"#), R]);
-    }
-
-    /// Tokenizes some more realistic examples.
-    #[test]
-    #[rustfmt::skip]
-    fn tokenize_examples() {
-        test(r"
-            [function][
-                Test [italic][example]!
-            ]
-        ", vec![
-            N, S, L, T("function"), R, L, N, S, T("Test"), S, L, T("italic"), R, L,
-            T("example"), R, T("!"), N, S, R, N, S
-        ]);
-
-        test(r"
-            [page: size=A4]
-            [font: size=12pt]
-
-            Das ist ein Beispielsatz mit *fetter* Schrift.
-        ", vec![
-            N, S, L, T("page"), C, S, T("size"), E, T("A4"), R, N, S,
-            L, T("font"), C, S, T("size"), E, T("12pt"), R, N, N, S,
-            T("Das"), S, T("ist"), S, T("ein"), S, T("Beispielsatz"), S, T("mit"), S,
-            TS, T("fetter"), TS, S, T("Schrift."), N, S
-        ]);
-    }
-
-    /// This test checks whether the colon and equals symbols get parsed correctly depending on the
-    /// context: Either in a function header or in a body.
-    #[test]
-    #[rustfmt::skip]
-    fn tokenize_symbols_context() {
-        test("[func: key=value][Answer: 7]", vec![
-            L, T("func"), C, S, T("key"), E, T("value"), R, L,
-            T("Answer:"), S, T("7"), R
-        ]);
-        test("[[n: k=v]:x][:[=]]:=", vec![
-            L, L, T("n"), C, S, T("k"), E, T("v"), R, C, T("x"), R,
-            L, T(":"), L, E, R, R, T(":=")
-        ]);
-        test("[hi: k=[func][body] v=1][hello]", vec![
-            L, T("hi"), C, S, T("k"), E, L, T("func"), R, L, T("body"), R, S,
-            T("v"), E, T("1"), R, L, T("hello"), R
-        ]);
-        test("[func: __key__=value]", vec![L, T("func"), C, S, T("__key__"), E, T("value"), R]);
-        test("The /*[*/ answer: 7.", vec![T("The"), S, BC("["), S, T("answer:"), S, T("7.")]);
-    }
-
-    /// Test if block and line comments get tokenized as expected.
-    #[test]
-    #[rustfmt::skip]
-    fn tokenize_comments() {
-        test("These // Line comments.", vec![T("These"), S, LC(" Line comments.")]);
-        test("This /* is */ a comment.", vec![T("This"), S, BC(" is "), S, T("a"), S, T("comment.")]);
-        test("[Head/*of*/][Body]", vec![L, T("Head"), BC("of"), R, L, T("Body"), R]);
-        test("/* Hey */ */", vec![BC(" Hey "), S, SS]);
-        test("Hey\n// Yoo /*\n*/", vec![T("Hey"), N, LC(" Yoo /*"), N, SS]);
-        test("/* My /* line // */ comment */", vec![BC(" My /* line // */ comment ")])
-    }
-
-    /// This test has a special look at the underscore syntax.
-    #[test]
-    #[rustfmt::skip]
-    fn tokenize_underscores() {
-        test("he_llo_world_ __ Now this_ is_ special!",
-             vec![T("he"), TU, T("llo"), TU, T("world"), TU, S, TU, TU, S, T("Now"), S,
-                  T("this"), TU, S, T("is"), TU, S, T("special!")]);
-    }
-
-    /// This test is for checking if non-ASCII characters get parsed correctly.
-    #[test]
-    #[rustfmt::skip]
-    fn tokenize_unicode() {
-        test("[document][Hello 🌍!]", vec![L, T("document"), R, L, T("Hello"), S, T("🌍!"), R]);
-        test("[f]⺐.", vec![L, T("f"), R, T("⺐.")]);
-    }
-
-    /// This test checks if all tokens have the correct spans.
-    #[test]
-    #[rustfmt::skip]
-    fn tokenize_spans() {
-        test_span("Hello World", vec![(1, 0, 1, 5), (1, 5, 1, 6), (1, 6, 1, 11)]);
-        test_span("🌍_🎈", vec![(1, 0, 1, 4), (1, 4, 1, 5), (1, 5, 1, 9)]);
-        test_span("hello\nworld", vec![(1, 0, 1, 5), (1, 5, 1, 6), (2, 0, 2, 5)]);
-        test_span("[hello: world]", vec![
-            (1, 0, 1, 1), (1, 1, 1, 6), (1, 6, 1, 7),
-            (1, 7, 1, 8), (1, 8, 1, 13), (1, 13, 1, 14)
-        ]);
+    fn position(&self) -> Position {
+        self.position
     }
 }
diff --git a/tests/parse.rs b/tests/parse.rs
index 953cc959f..e00b05d83 100644
--- a/tests/parse.rs
+++ b/tests/parse.rs
@@ -1,9 +1,26 @@
+#![allow(unused_imports)]
+#![allow(non_snake_case)]
+
+use typstc::size::Size;
 use typstc::syntax::*;
 use Token::{
-    Space as S, Newline as N, LeftBracket as LB,
-    RightBracket as RB, Text as T, *
+    Whitespace as W,
+    LineComment as LC, BlockComment as BC, StarSlash as SS,
+    LeftBracket as LB, RightBracket as RB,
+    LeftParen as LP, RightParen as RP,
+    LeftBrace as LBR, RightBrace as RBR,
+    Colon as CL, Comma as CM, Equals as EQ, Expr as E,
+    Star as ST, Underscore as U, Backtick as B, Text as T,
 };
 
+use Expression as Expr;
+fn ID(ident: &str) -> Token { E(Expr::Ident(Ident::new(ident.to_string()).unwrap())) }
+fn STR(ident: &str) -> Token { E(Expr::Str(ident.to_string())) }
+fn SIZE(size: Size) -> Token<'static> { E(Expr::Size(size)) }
+fn NUM(num: f64) -> Token<'static> { E(Expr::Num(num)) }
+fn BOOL(b: bool) -> Token<'static> { E(Expr::Bool(b)) }
+
+
 /// Parses the test syntax.
 macro_rules! tokens {
     ($($src:expr =>($line:expr)=> $tokens:expr)*) => ({
diff --git a/tests/parsing/base.rs b/tests/parsing/base.rs
deleted file mode 100644
index ad7d87c02..000000000
--- a/tests/parsing/base.rs
+++ /dev/null
@@ -1,78 +0,0 @@
-// Spaces, Newlines, Brackets.
-""                => []
-" "               => [S]
-"    "            => [S]
-"\t"              => [S]
-"  \t"            => [S]
-"\n"              => [N]
-"\n "             => [N, S]
-"  \n"            => [S, N]
-"  \n   "         => [S, N, S]
-"["               => [LB]
-"]"               => [RB]
-
-// Header only tokens.
-"[:]"             => [LB, Colon, RB]
-"[=]"             => [LB, Equals, RB]
-"[,]"             => [LB, Comma, RB]
-":"               => [T(":")]
-"="               => [T("=")]
-","               => [T(",")]
-r#"["hi"]"#       => [LB, Quoted("hi"), RB]
-r#""hi""#         => [T(r#""hi""#)]
-
-// Body only tokens.
-"_"               => [Underscore]
-"*"               => [Star]
-"`"               => [Backtick]
-"[_]"             => [LB, T("_"), RB]
-"[*]"             => [LB, T("*"), RB]
-"[`]"             => [LB, T("`"), RB]
-
-// Comments.
-"//line"          => [LineComment("line")]
-"/*block*/"       => [BlockComment("block")]
-"*/"              => [StarSlash]
-
-// Plain text.
-"A"               => [T("A")]
-"Hello"           => [T("Hello")]
-"Hello-World"     => [T("Hello-World")]
-r#"A"B"#          => [T(r#"A"B"#)]
-"🌍"              => [T("🌍")]
-
-// Escapes.
-r"\["             => [T("[")]
-r"\]"             => [T("]")]
-r"\\"             => [T(r"\")]
-r"[\[]"           => [LB, T("["), RB]
-r"[\]]"           => [LB, T("]"), RB]
-r"[\\]"           => [LB, T(r"\"), RB]
-r"\:"             => [T(":")]
-r"\="             => [T("=")]
-r"\/"             => [T("/")]
-r"[\:]"           => [LB, T(":"), RB]
-r"[\=]"           => [LB, T("="), RB]
-r"[\,]"           => [LB, T(","), RB]
-r"\*"             => [T("*")]
-r"\_"             => [T("_")]
-r"\`"             => [T("`")]
-r"[\*]"           => [LB, T("*"), RB]
-r"[\_]"           => [LB, T("_"), RB]
-r"[\`]"           => [LB, T("`"), RB]
-
-// Whitespace.
-"Hello World"     => [T("Hello"), S, T("World")]
-"Hello  World"    => [T("Hello"), S, T("World")]
-"Hello \t World"  => [T("Hello"), S, T("World")]
-
-// Newline.
-"First\n"         => [T("First"), N]
-"First \n"        => [T("First"), S, N]
-"First\n "        => [T("First"), N, S]
-"First \n "       => [T("First"), S, N, S]
-"First\nSecond"   => [T("First"), N, T("Second")]
-"First\r\nSecond" => [T("First"), N, T("Second")]
-"First \nSecond"  => [T("First"), S, N, T("Second")]
-"First\n Second"  => [T("First"), N, S, T("Second")]
-"First \n Second" => [T("First"), S, N, S, T("Second")]
diff --git a/tests/parsing/tokens.rs b/tests/parsing/tokens.rs
new file mode 100644
index 000000000..4f5474bb2
--- /dev/null
+++ b/tests/parsing/tokens.rs
@@ -0,0 +1,62 @@
+// Whitespace.
+""             => []
+" "            => [W(0)]
+"    "         => [W(0)]
+"\t"           => [W(0)]
+"  \t"         => [W(0)]
+"\n"           => [W(1)]
+"\n "          => [W(1)]
+"  \n"         => [W(1)]
+"  \n   "      => [W(1)]
+"  \n\t \n  "  => [W(2)]
+"\r\n"         => [W(1)]
+" \r\r\n \x0D" => [W(3)]
+"\n\r"         => [W(2)]
+
+// Comments.
+"a // bc\n "        => [T("a"), W(0), LC(" bc"), W(1)]
+"a //a//b\n "       => [T("a"), W(0), LC("a//b"), W(1)]
+"a //a//b\r\n"      => [T("a"), W(0), LC("a//b"), W(1)]
+"a //a//b\n\nhello" => [T("a"), W(0), LC("a//b"), W(2), T("hello")]
+"/**/"              => [BC("")]
+"_/*_/*a*/*/"       => [U, BC("_/*a*/")]
+"/*/*/"             => [BC("/*/")]
+"abc*/"             => [T("abc"), SS]
+
+// Header only tokens.
+"["                   => [LB]
+"]"                   => [RB]
+"[(){}:=,]"           => [LB, LP, RP, LBR, RBR, CL, EQ, CM, RB]
+"[a:b]"               => [LB, ID("a"), CL, ID("b"), RB]
+"[🌓, 🌍,]"          => [LB, T("🌓"), CM, W(0), T("🌍"), CM, RB]
+"[=]"                 => [LB, EQ, RB]
+"[,]"                 => [LB, CM, RB]
+"a: b"                => [T("a"), T(":"), W(0), T("b")]
+"c=d, "               => [T("c"), T("=d"), T(","), W(0)]
+r#"["hello\"world"]"# => [LB, STR(r#"hello\"world"#), RB]
+r#"["hi", 12pt]"#     => [LB, STR("hi"), CM, W(0), SIZE(Size::pt(12.0)), RB]
+"\"hi\""              => [T("\"hi"), T("\"")]
+"[a: true, x=1]"      => [LB, ID("a"), CL, W(0), BOOL(true), CM, W(0),
+                          ID("x"), EQ, NUM(1.0), RB]
+"[120%]"              => [LB, NUM(1.2), RB]
+
+// Body only tokens.
+"_*`"           => [U, ST, B]
+"[_*`]"         => [LB, T("_"), T("*"), T("`"), RB]
+"hi_you_ there" => [T("hi"), U, T("you"), U, W(0), T("there")]
+
+// Escapes.
+r"\["   => [T("[")]
+r"\]"   => [T("]")]
+r"\\"   => [T(r"\")]
+r"\/"   => [T("/")]
+r"\*"   => [T("*")]
+r"\_"   => [T("_")]
+r"\`"   => [T("`")]
+
+// Unescapable special symbols.
+r"\:"   => [T(r"\"), T(":")]
+r"\="   => [T(r"\"), T("=")]
+r"[\:]" => [LB, T(r"\"), CL, RB]
+r"[\=]" => [LB, T(r"\"), EQ, RB]
+r"[\,]" => [LB, T(r"\"), CM, RB]