Remove the concept of words from tokenization 🎈

2025-07-19 10:32:54 +08:00 · 2019-04-30 09:15:31 +02:00 · 2019-04-30 09:15:31 +02:00 · 9d605c3128
commit 9d605c3128
parent 90848df5de
6 changed files with 152 additions and 289 deletions
--- a/Cargo.toml
+++ b/Cargo.toml
@ -7,7 +7,6 @@ edition = "2018"
 [dependencies]
 pdf = { path = "../pdf" }
 opentype = { path = "../opentype" }
 unicode-segmentation = "1.2"
 unicode-xid = "0.1.0"
 byteorder = "1"
 smallvec = "0.6.9"
--- a/src/engine/mod.rs
+++ b/src/engine/mod.rs
@ -62,7 +62,7 @@ impl<'a> Engine<'a> {
        // Iterate through the documents nodes.
        for node in &self.tree.nodes {
            match node {
-                Node::Word(word) => self.write_word(word)?,
+                Node::Text(text) => self.write_word(text)?,
                Node::Space => self.write_space()?,
                Node::Newline => {
                    self.write_buffered_text();
--- a/src/lib.rs
+++ b/src/lib.rs
@ -53,7 +53,6 @@ use crate::syntax::SyntaxTree;
 #[macro_use]
 mod error;
 mod utility;
 pub mod doc;
 pub mod engine;
 pub mod export;
--- a/src/parsing.rs
+++ b/src/parsing.rs
@ -1,30 +1,29 @@
 //! Tokenization and parsing of source code into syntax trees.
 use std::collections::HashMap;
 use std::fmt;
 use std::iter::Peekable;
 use std::mem::swap;
 use std::ops::Deref;
 use std::str::CharIndices;
-use unicode_segmentation::{UnicodeSegmentation, UWordBounds};
+use unicode_xid::UnicodeXID;
 use crate::syntax::*;
 use crate::func::{ParseContext, Scope};
 use crate::utility::{Splinor, Spline, Splined, StrExt};
 /// An iterator over the tokens of source code.
-#[derive(Clone)]
+#[derive(Debug, Clone)]
 pub struct Tokens<'s> {
    source: &'s str,
-    words: Peekable<UWordBounds<'s>>,
+    chars: Peekable<CharIndices<'s>>,
-    state: TokensState<'s>,
+    state: TokensState,
-    stack: Vec<TokensState<'s>>,
+    stack: Vec<TokensState>,
 }
 /// The state the tokenizer is in.
-#[derive(Debug, Clone)]
+#[derive(Debug, Clone, PartialEq)]
-enum TokensState<'s> {
+enum TokensState {
    /// The base state if there is nothing special we are in.
    Body,
    /// Inside a function header. Here colons and equal signs get parsed
@ -32,9 +31,6 @@ enum TokensState<'s> {
    Function,
    /// We expect either the end of the function or the beginning of the body.
    MaybeBody,
    /// We are inside one unicode word that consists of multiple tokens,
    /// because it contains double underscores.
    DoubleUnderscore(Spline<'s, Token<'s>>),
 }
 impl<'s> Tokens<'s> {
@ -43,7 +39,7 @@ impl<'s> Tokens<'s> {
    pub fn new(source: &'s str) -> Tokens<'s> {
        Tokens {
            source,
-            words: source.split_word_bounds().peekable(),
+            chars: source.char_indices().peekable(),
            state: TokensState::Body,
            stack: vec![],
        }
@ -51,11 +47,11 @@ impl<'s> Tokens<'s> {
    /// Advance the iterator by one step.
    fn advance(&mut self) {
-        self.words.next();
+        self.chars.next();
    }
    /// Switch to the given state.
-    fn switch(&mut self, mut state: TokensState<'s>) {
+    fn switch(&mut self, mut state: TokensState) {
        swap(&mut state, &mut self.state);
        self.stack.push(state);
    }
@ -70,6 +66,11 @@ impl<'s> Tokens<'s> {
        self.advance();
        token
    }
    /// Returns a word containing the string bounded by the given indices.
    fn text(&self, start: usize, end: usize) -> Token<'s> {
        Token::Text(&self.source[start .. end])
    }
 }
 impl<'s> Iterator for Tokens<'s> {
@ -79,27 +80,11 @@ impl<'s> Iterator for Tokens<'s> {
    fn next(&mut self) -> Option<Token<'s>> {
        use TokensState as TS;
-        // Return the remaining words and double underscores.
+        // Skip whitespace, but if at least one whitespace character existed,
-        if let TS::DoubleUnderscore(splinor) = &mut self.state {
+        // remember that, because then we return a space token.
            loop {
                if let Some(splined) = splinor.next() {
                    return Some(match splined {
                        Splined::Value(word) if word != "" => Token::Word(word),
                        Splined::Splinor(s) => s,
                        _ => continue,
                    });
                } else {
                    self.unswitch();
                    break;
                }
            }
        }
        // Skip whitespace, but if at least one whitespace word existed,
        // remember that, because we return a space token.
        let mut whitespace = false;
-        while let Some(word) = self.words.peek() {
+        while let Some(&(_, c)) = self.chars.peek() {
-            if !word.is_whitespace() {
+            if !c.is_whitespace() || c == '\n' || c == '\r' {
                break;
            }
            whitespace = true;
@ -111,100 +96,82 @@ impl<'s> Iterator for Tokens<'s> {
        // Function maybe has a body
        if self.state == TS::MaybeBody {
-            match *self.words.peek()? {
+            if self.chars.peek()?.1 == '[' {
-                "[" => {
+                self.state = TS::Body;
-                    self.state = TS::Body;
+                return Some(self.consumed(Token::LeftBracket));
-                    return Some(self.consumed(Token::LeftBracket));
+            } else {
-                },
+                self.unswitch();
                _ => self.unswitch(),
            }
        }
        // Now all special cases are handled and we can finally look at the
        // next words.
-        let next = self.words.next()?;
+        let (next_pos, next) = self.chars.next()?;
-        let afterwards = self.words.peek();
+        let afterwards = self.chars.peek().map(|&(_, c)| c);
        Some(match next {
            // Special characters
-            "[" => {
+            '[' => {
                self.switch(TS::Function);
                Token::LeftBracket
            },
-            "]" => {
+            ']' => {
                if self.state == TS::Function {
                    self.state = TS::MaybeBody;
                }
                Token::RightBracket
            },
-            "$" => Token::Dollar,
+            '$' => Token::Dollar,
-            "#" => Token::Hashtag,
+            '#' => Token::Hashtag,
            // Context sensitive operators
-            ":" if self.state == TS::Function => Token::Colon,
+            ':' if self.state == TS::Function => Token::Colon,
-            "=" if self.state == TS::Function => Token::Equals,
+            '=' if self.state == TS::Function => Token::Equals,
            // Double star/underscore
-            "*" if afterwards == Some(&"*") => self.consumed(Token::DoubleStar),
+            '*' if afterwards == Some('*') => self.consumed(Token::DoubleStar),
-            "__" => Token::DoubleUnderscore,
+            '_' if afterwards == Some('_') => self.consumed(Token::DoubleUnderscore),
            // Newlines
-            "\n" | "\r\n" => Token::Newline,
+            '\n' => Token::Newline,
            '\r' if afterwards == Some('\n') => self.consumed(Token::Newline),
            // Escaping
-            r"\" => {
+            '\\' => {
-                if let Some(next) = afterwards {
+                if let Some(&(index, c)) = self.chars.peek() {
-                    let escapable = match *next {
+                    if is_special_character(c) {
                        "[" | "]" | "$" | "#" | r"\" | ":" | "=" | "*" | "_" => true,
                        w if w.starts_with("__") => true,
                        _ => false,
                    };
                    if escapable {
                        let next = *next;
                        self.advance();
-                        return Some(Token::Word(next));
+                        return Some(self.text(index, index + c.len_utf8()));
                    }
                }
-                Token::Word(r"\")
+                Token::Text("\\")
            },
            // Double underscores hidden in words.
            word if word.contains("__") => {
                let spline = word.spline("__", Token::DoubleUnderscore);
                self.switch(TS::DoubleUnderscore(spline));
                return self.next();
            },
            // Now it seems like it's just a normal word.
-            word => Token::Word(word),
+            _ => {
                // Find out when the word ends.
                let mut end = (next_pos, next);
                while let Some(&(index, c)) = self.chars.peek() {
                    if is_special_character(c) || c.is_whitespace() {
                        break;
                    }
                    end = (index, c);
                    self.advance();
                }
                let end_pos = end.0 + end.1.len_utf8();
                self.text(next_pos, end_pos)
            },
        })
    }
 }
-impl fmt::Debug for Tokens<'_> {
+/// Whether this character has a special meaning in the language.
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+fn is_special_character(character: char) -> bool {
-        f.debug_struct("Tokens")
+    match character {
-            .field("source", &self.source)
+        '[' | ']' | '$' | '#' | '\\' | ':' | '=' | '*' | '_' => true,
-            .field("words", &"Peekable<UWordBounds>")
+        _ => false,
            .field("state", &self.state)
            .field("stack", &self.stack)
            .finish()
    }
 }
 impl PartialEq for TokensState<'_> {
    fn eq(&self, other: &TokensState) -> bool {
        use TokensState as TS;
        match (self, other) {
            (TS::Body, TS::Body) => true,
            (TS::Function, TS::Function) => true,
            (TS::MaybeBody, TS::MaybeBody) => true,
            // They are not necessarily different, but we don't care
            _ => false,
        }
    }
 }
@ -285,8 +252,8 @@ impl<'s, 't> Parser<'s, 't> {
                    Token::Space => self.append_space_consumed(),
                    Token::Newline => self.switch_consumed(PS::FirstNewline),
-                    // Words
+                    // Text
-                    Token::Word(word) => self.append_consumed(Node::Word(word.to_owned())),
+                    Token::Text(word) => self.append_consumed(Node::Text(word.to_owned())),
                    // Functions
                    Token::LeftBracket => self.parse_function()?,
@ -315,7 +282,7 @@ impl<'s, 't> Parser<'s, 't> {
        // The next token should be the name of the function.
        let name = match self.tokens.next() {
-            Some(Token::Word(word)) => {
+            Some(Token::Text(word)) => {
                if word.is_identifier() {
                    Ok(word.to_owned())
                } else {
@ -537,6 +504,39 @@ impl<'s> Iterator for ParseTokens<'s> {
    }
 }
 /// More useful functions on `str`'s.
 trait StrExt {
    /// Whether self consists only of whitespace.
    fn is_whitespace(&self) -> bool;
    /// Whether this word is a valid unicode identifier.
    fn is_identifier(&self) -> bool;
 }
 impl StrExt for str {
    fn is_whitespace(&self) -> bool {
        self.chars().all(|c| c.is_whitespace() && c != '\n')
    }
    fn is_identifier(&self) -> bool {
        let mut chars = self.chars();
        match chars.next() {
            Some(c) if !UnicodeXID::is_xid_start(c) => return false,
            None => return false,
            _ => (),
        }
        while let Some(c) = chars.next() {
            if !UnicodeXID::is_xid_continue(c) {
                return false;
            }
        }
        true
    }
 }
 /// The error type for parsing.
 pub struct ParseError(String);
@ -560,7 +560,7 @@ mod token_tests {
    use super::*;
    use Token::{Space as S, Newline as N, LeftBracket as L, RightBracket as R,
                Colon as C, Equals as E, DoubleUnderscore as DU, DoubleStar as DS,
-                Dollar as D, Hashtag as H, Word as W};
+                Dollar as D, Hashtag as H, Text as T};
    /// Test if the source code tokenizes to the tokens.
    fn test(src: &str, tokens: Vec<Token>) {
@ -571,7 +571,7 @@ mod token_tests {
    #[test]
    fn tokenize_base() {
        test("", vec![]);
-        test("Hallo", vec![W("Hallo")]);
+        test("Hallo", vec![T("Hallo")]);
        test("[", vec![L]);
        test("]", vec![R]);
        test("$", vec![D]);
@ -586,26 +586,26 @@ mod token_tests {
    fn tokenize_whitespace_newlines() {
        test(" \t", vec![S]);
        test("First line\r\nSecond line\nThird line\n",
-             vec![W("First"), S, W("line"), N, W("Second"), S, W("line"), N,
+             vec![T("First"), S, T("line"), N, T("Second"), S, T("line"), N,
-                  W("Third"), S, W("line"), N]);
+                  T("Third"), S, T("line"), N]);
-        test("Hello \n ", vec![W("Hello"), S, N, S]);
+        test("Hello \n ", vec![T("Hello"), S, N, S]);
-        test("Dense\nTimes", vec![W("Dense"), N, W("Times")]);
+        test("Dense\nTimes", vec![T("Dense"), N, T("Times")]);
    }
    /// Tests if escaping with backslash works as it should.
    #[test]
    fn tokenize_escape() {
-        test(r"\[", vec![W("[")]);
+        test(r"\[", vec![T("[")]);
-        test(r"\]", vec![W("]")]);
+        test(r"\]", vec![T("]")]);
-        test(r"\#", vec![W("#")]);
+        test(r"\#", vec![T("#")]);
-        test(r"\$", vec![W("$")]);
+        test(r"\$", vec![T("$")]);
-        test(r"\:", vec![W(":")]);
+        test(r"\:", vec![T(":")]);
-        test(r"\=", vec![W("=")]);
+        test(r"\=", vec![T("=")]);
-        test(r"\**", vec![W("*"), W("*")]);
+        test(r"\**", vec![T("*"), T("*")]);
-        test(r"\*", vec![W("*")]);
+        test(r"\*", vec![T("*")]);
-        test(r"\__", vec![W("__")]);
+        test(r"\__", vec![T("_"), T("_")]);
-        test(r"\_", vec![W("_")]);
+        test(r"\_", vec![T("_")]);
-        test(r"\hello", vec![W(r"\"), W("hello")]);
+        test(r"\hello", vec![T("\\"), T("hello")]);
    }
    /// Tokenizes some more realistic examples.
@ -616,8 +616,8 @@ mod token_tests {
                Test [italic][example]!
            ]
        ", vec![
-            N, S, L, W("function"), R, L, N, S, W("Test"), S, L, W("italic"), R, L,
+            N, S, L, T("function"), R, L, N, S, T("Test"), S, L, T("italic"), R, L,
-            W("example"), R, W("!"), N, S, R, N, S
+            T("example"), R, T("!"), N, S, R, N, S
        ]);
        test(r"
@ -626,10 +626,10 @@ mod token_tests {
            Das ist ein Beispielsatz mit **fetter** Schrift.
        ", vec![
-            N, S, L, W("page"), C, S, W("size"), E, W("A4"), R, N, S,
+            N, S, L, T("page"), C, S, T("size"), E, T("A4"), R, N, S,
-            L, W("font"), C, S, W("size"), E, W("12pt"), R, N, N, S,
+            L, T("font"), C, S, T("size"), E, T("12pt"), R, N, N, S,
-            W("Das"), S, W("ist"), S, W("ein"), S, W("Beispielsatz"), S, W("mit"), S,
+            T("Das"), S, T("ist"), S, T("ein"), S, T("Beispielsatz"), S, T("mit"), S,
-            DS, W("fetter"), DS, S, W("Schrift"), W("."), N, S
+            DS, T("fetter"), DS, S, T("Schrift."), N, S
        ]);
    }
@ -638,13 +638,13 @@ mod token_tests {
    #[test]
    fn tokenize_symbols_context() {
        test("[func: key=value][Answer: 7]",
-             vec![L, W("func"), C, S, W("key"), E, W("value"), R, L,
+             vec![L, T("func"), C, S, T("key"), E, T("value"), R, L,
-                  W("Answer"), W(":"), S, W("7"), R]);
+                  T("Answer"), T(":"), S, T("7"), R]);
        test("[[n: k=v]:x][:[=]]:=",
-             vec![L, L, W("n"), C, S, W("k"), E, W("v"), R, C, W("x"), R,
+             vec![L, L, T("n"), C, S, T("k"), E, T("v"), R, C, T("x"), R,
-                  L, W(":"), L, E, R, R, W(":"), W("=")]);
+                  L, T(":"), L, E, R, R, T(":"), T("=")]);
        test("[func: __key__=value]",
-             vec![L, W("func"), C, S, DU, W("key"), DU, E, W("value"), R]);
+             vec![L, T("func"), C, S, DU, T("key"), DU, E, T("value"), R]);
    }
    /// This test has a special look at the double underscore syntax, because
@ -653,16 +653,16 @@ mod token_tests {
    #[test]
    fn tokenize_double_underscore() {
        test("he__llo__world_ _ __ Now this_ is__ special!",
-             vec![W("he"), DU, W("llo"), DU, W("world_"), S, W("_"), S, DU, S, W("Now"), S,
+             vec![T("he"), DU, T("llo"), DU, T("world"), T("_"), S, T("_"), S, DU, S, T("Now"), S,
-                  W("this_"), S, W("is"), DU, S, W("special"), W("!")]);
+                  T("this"), T("_"), S, T("is"), DU, S, T("special!")]);
    }
    /// This test is for checking if non-ASCII characters get parsed correctly.
    #[test]
    fn tokenize_unicode() {
        test("[document][Hello 🌍!]",
-             vec![L, W("document"), R, L, W("Hello"), S, W("🌍"), W("!"), R]);
+             vec![L, T("document"), R, L, T("Hello"), S, T("🌍!"), R]);
-        test("[f]⺐.", vec![L, W("f"), R, W("⺐"), W(".")]);
+        test("[f]⺐.", vec![L, T("f"), R, T("⺐.")]);
    }
 }
@ -674,7 +674,7 @@ mod parse_tests {
    use Node::{Space as S, Newline as N, Func as F};
    #[allow(non_snake_case)]
-    fn W(s: &str) -> Node { Node::Word(s.to_owned()) }
+    fn T(s: &str) -> Node { Node::Text(s.to_owned()) }
    /// A testing function which just parses it's body into a syntax tree.
    #[derive(Debug, PartialEq)]
@ -764,19 +764,19 @@ mod parse_tests {
    #[test]
    fn parse_base() {
        test("", tree! []);
-        test("Hello World!", tree! [ W("Hello"), S, W("World"), W("!") ]);
+        test("Hello World!", tree! [ T("Hello"), S, T("World!") ]);
    }
    /// Test whether newlines generate the correct whitespace.
    #[test]
    fn parse_newlines_whitespace() {
-        test("Hello\nWorld", tree! [ W("Hello"), S, W("World") ]);
+        test("Hello\nWorld", tree! [ T("Hello"), S, T("World") ]);
-        test("Hello \n World", tree! [ W("Hello"), S, W("World") ]);
+        test("Hello \n World", tree! [ T("Hello"), S, T("World") ]);
-        test("Hello\n\nWorld", tree! [ W("Hello"), N, W("World") ]);
+        test("Hello\n\nWorld", tree! [ T("Hello"), N, T("World") ]);
-        test("Hello \n\nWorld", tree! [ W("Hello"), S, N, W("World") ]);
+        test("Hello \n\nWorld", tree! [ T("Hello"), S, N, T("World") ]);
-        test("Hello\n\n  World", tree! [ W("Hello"), N, S, W("World") ]);
+        test("Hello\n\n  World", tree! [ T("Hello"), N, S, T("World") ]);
-        test("Hello \n \n \n  World", tree! [ W("Hello"), S, N, S, W("World") ]);
+        test("Hello \n \n \n  World", tree! [ T("Hello"), S, N, S, T("World") ]);
-        test("Hello\n \n\n  World", tree! [ W("Hello"), S, N, S, W("World") ]);
+        test("Hello\n \n\n  World", tree! [ T("Hello"), S, N, S, T("World") ]);
    }
    /// Parse things dealing with functions.
@ -790,18 +790,18 @@ mod parse_tests {
        test_scoped(&scope,"[test]", tree! [ F(func! { name => "test", body => None }) ]);
        test_scoped(&scope, "This is an [modifier][example] of a function invocation.", tree! [
-            W("This"), S, W("is"), S, W("an"), S,
+            T("This"), S, T("is"), S, T("an"), S,
-            F(func! { name => "modifier", body => tree! [ W("example") ] }), S,
+            F(func! { name => "modifier", body => tree! [ T("example") ] }), S,
-            W("of"), S, W("a"), S, W("function"), S, W("invocation"), W(".")
+            T("of"), S, T("a"), S, T("function"), S, T("invocation.")
        ]);
        test_scoped(&scope, "[func][Hello][modifier][Here][end]",  tree! [
            F(func! {
                name => "func",
-                body => tree! [ W("Hello") ],
+                body => tree! [ T("Hello") ],
            }),
            F(func! {
                name => "modifier",
-                body => tree! [ W("Here") ],
+                body => tree! [ T("Here") ],
            }),
            F(func! {
                name => "end",
@ -820,11 +820,11 @@ mod parse_tests {
                body => tree! [
                    F(func! {
                        name => "func",
-                        body => tree! [ W("call") ],
+                        body => tree! [ T("call") ],
                    }),
                ],
            }),
-            S, W("outside")
+            S, T("outside")
        ]);
    }
@ -839,12 +839,12 @@ mod parse_tests {
                name => "func",
                body => None,
            }),
-            S, W("⺐"), W(".")
+            S, T("⺐.")
        ]);
        test_scoped(&scope, "[bold][Hello 🌍!]", tree! [
            F(func! {
                name => "bold",
-                body => tree! [ W("Hello"), S, W("🌍"), W("!") ],
+                body => tree! [ T("Hello"), S, T("🌍!") ],
            })
        ]);
    }
--- a/src/syntax.rs
+++ b/src/syntax.rs
@ -30,8 +30,8 @@ pub enum Token<'s> {
    Dollar,
    /// A hashtag starting a _comment_.
    Hashtag,
-    /// Everything else just is a literal word.
+    /// Everything else is just text.
-    Word(&'s str),
+    Text(&'s str),
 }
 /// A tree representation of the source.
@ -62,8 +62,8 @@ pub enum Node {
    ToggleBold,
    /// Indicates that math mode was enabled/disabled.
    ToggleMath,
-    /// A literal word.
+    /// Literal text.
-    Word(String),
+    Text(String),
    /// A function invocation.
    Func(FuncCall),
 }
--- a/src/utility.rs
+++ b/src/utility.rs
@ -1,135 +0,0 @@
 //! Utility functionality.
 use std::iter::Peekable;
 use std::str::Split;
 use unicode_xid::UnicodeXID;
 /// Types that can be splined.
 pub trait Splinor {
    /// Returns an iterator over the substrings splitted by the pattern,
    /// intertwined with the splinor.
    ///
    /// # Example
    ///
    /// ```ignore
    /// #[derive(Debug, Copy, Clone, PartialEq)]
    /// struct Space;
    ///
    /// let v: Vec<Splined<Space>> = "My airplane flies!".spline(" ", Space).collect();
    /// assert_eq!(v, [
    ///     Splined::Value("My"),
    ///     Splined::Splinor(Space),
    ///     Splined::Value("airplane"),
    ///     Splined::Splinor(Space),
    ///     Splined::Value("flies!"),
    /// ]);
    /// ```
    fn spline<'s, T: Clone>(&'s self, pat: &'s str, splinor: T) -> Spline<'s, T>;
 }
 impl Splinor for str {
    fn spline<'s, T: Clone>(&'s self, pat: &'s str, splinor: T) -> Spline<'s, T> {
        Spline {
            splinor: Splined::Splinor(splinor),
            split: self.split(pat).peekable(),
            next_splinor: false,
        }
    }
 }
 /// Iterator over splitted values and splinors.
 ///
 /// Created by the [`spline`](Splinor::spline) function.
 #[derive(Debug, Clone)]
 pub struct Spline<'s, T> {
    splinor: Splined<'s, T>,
    split: Peekable<Split<'s, &'s str>>,
    next_splinor: bool,
 }
 /// Represents either a splitted substring or a splinor.
 #[derive(Debug, Copy, Clone, Eq, PartialEq)]
 pub enum Splined<'s, T> {
    /// A substring.
    Value(&'s str),
    /// An intertwined splinor.
    Splinor(T),
 }
 impl<'s, T: Clone> Iterator for Spline<'s, T> {
    type Item = Splined<'s, T>;
    fn next(&mut self) -> Option<Splined<'s, T>> {
        if self.next_splinor && self.split.peek().is_some() {
            self.next_splinor = false;
            return Some(self.splinor.clone());
        } else {
            self.next_splinor = true;
            return Some(Splined::Value(self.split.next()?))
        }
    }
 }
 /// More useful functions on `str`'s.
 pub trait StrExt {
    /// Whether self consists only of whitespace.
    fn is_whitespace(&self) -> bool;
    /// Whether this word is a valid unicode identifier.
    fn is_identifier(&self) -> bool;
 }
 impl StrExt for str {
    fn is_whitespace(&self) -> bool {
        self.chars().all(|c| c.is_whitespace() && c != '\n')
    }
    fn is_identifier(&self) -> bool {
        let mut chars = self.chars();
        match chars.next() {
            Some(c) if !UnicodeXID::is_xid_start(c) => return false,
            None => return false,
            _ => (),
        }
        while let Some(c) = chars.next() {
            if !UnicodeXID::is_xid_continue(c) {
                return false;
            }
        }
        true
    }
 }
 #[cfg(test)]
 mod splinor_tests {
    use super::*;
    use Splined::{Value as V, Splinor as S};
    #[derive(Debug, Copy, Clone, PartialEq)]
    enum Token { DoubleUnderscore }
    fn test<T>(string: &str, pat: &str, splinor: T, vec: Vec<Splined<T>>)
        where T: std::fmt::Debug + Clone + PartialEq {
        assert_eq!(string.spline(pat, splinor).collect::<Vec<_>>(), vec);
    }
    #[test]
    fn splinor() {
        let s = S(Token::DoubleUnderscore);
        test("__he__llo__world__", "__", Token::DoubleUnderscore,
             vec![V(""), s, V("he"), s, V("llo"), s, V("world"), s, V("")]);
        test("__Italic__", "__", Token::DoubleUnderscore,
             vec![V(""), s, V("Italic"), s, V("")]);
        test("Key__Value", "__", Token::DoubleUnderscore,
             vec![V("Key"), s, V("Value")]);
        test("__Start__NoEnd", "__", Token::DoubleUnderscore,
             vec![V(""), s, V("Start"), s, V("NoEnd")]);
        test("NoStart__End__", "__", Token::DoubleUnderscore,
             vec![V("NoStart"), s, V("End"), s, V("")]);
    }
 }