mirror of
https://github.com/typst/typst
synced 2025-05-14 04:56:26 +08:00
Switch to unscanny
This commit is contained in:
parent
2db4b603db
commit
c5b3f8ee98
6
Cargo.lock
generated
6
Cargo.lock
generated
@ -876,6 +876,7 @@ dependencies = [
|
|||||||
"unicode-script",
|
"unicode-script",
|
||||||
"unicode-segmentation",
|
"unicode-segmentation",
|
||||||
"unicode-xid",
|
"unicode-xid",
|
||||||
|
"unscanny",
|
||||||
"usvg",
|
"usvg",
|
||||||
"walkdir",
|
"walkdir",
|
||||||
"xi-unicode",
|
"xi-unicode",
|
||||||
@ -938,6 +939,11 @@ version = "0.2.2"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3"
|
checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "unscanny"
|
||||||
|
version = "0.1.0"
|
||||||
|
source = "git+https://github.com/typst/unscanny#c943791649841388803b7ca873ce72683903fd39"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "usvg"
|
name = "usvg"
|
||||||
version = "0.20.0"
|
version = "0.20.0"
|
||||||
|
@ -21,6 +21,7 @@ once_cell = "1"
|
|||||||
serde = { version = "1", features = ["derive"] }
|
serde = { version = "1", features = ["derive"] }
|
||||||
typed-arena = "2"
|
typed-arena = "2"
|
||||||
parking_lot = "0.12"
|
parking_lot = "0.12"
|
||||||
|
unscanny = { git = "https://github.com/typst/unscanny" }
|
||||||
|
|
||||||
# Text and font handling
|
# Text and font handling
|
||||||
hypher = "0.1"
|
hypher = "0.1"
|
||||||
|
@ -1,9 +1,10 @@
|
|||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
|
|
||||||
use iai::{black_box, main, Iai};
|
use iai::{black_box, main, Iai};
|
||||||
|
use unscanny::Scanner;
|
||||||
|
|
||||||
use typst::loading::MemLoader;
|
use typst::loading::MemLoader;
|
||||||
use typst::parse::{parse, Scanner, TokenMode, Tokens};
|
use typst::parse::{parse, TokenMode, Tokens};
|
||||||
use typst::source::SourceId;
|
use typst::source::SourceId;
|
||||||
use typst::Context;
|
use typst::Context;
|
||||||
|
|
||||||
|
@ -1,8 +1,9 @@
|
|||||||
|
use unscanny::Scanner;
|
||||||
|
|
||||||
use crate::library::layout::{GridNode, TrackSizing};
|
use crate::library::layout::{GridNode, TrackSizing};
|
||||||
use crate::library::prelude::*;
|
use crate::library::prelude::*;
|
||||||
use crate::library::text::ParNode;
|
use crate::library::text::ParNode;
|
||||||
use crate::library::utility::Numbering;
|
use crate::library::utility::Numbering;
|
||||||
use crate::parse::Scanner;
|
|
||||||
|
|
||||||
/// An unordered (bulleted) or ordered (numbered) list.
|
/// An unordered (bulleted) or ordered (numbered) list.
|
||||||
#[derive(Debug, Hash)]
|
#[derive(Debug, Hash)]
|
||||||
@ -190,7 +191,7 @@ impl Cast<Spanned<Value>> for Label {
|
|||||||
let mut s = Scanner::new(&pattern);
|
let mut s = Scanner::new(&pattern);
|
||||||
let mut prefix;
|
let mut prefix;
|
||||||
let numbering = loop {
|
let numbering = loop {
|
||||||
prefix = s.eaten();
|
prefix = s.before();
|
||||||
match s.eat().map(|c| c.to_ascii_lowercase()) {
|
match s.eat().map(|c| c.to_ascii_lowercase()) {
|
||||||
Some('1') => break Numbering::Arabic,
|
Some('1') => break Numbering::Arabic,
|
||||||
Some('a') => break Numbering::Letter,
|
Some('a') => break Numbering::Letter,
|
||||||
@ -200,8 +201,8 @@ impl Cast<Spanned<Value>> for Label {
|
|||||||
None => Err("invalid pattern")?,
|
None => Err("invalid pattern")?,
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
let upper = s.prev(0).map_or(false, char::is_uppercase);
|
let upper = s.scout(-1).map_or(false, char::is_uppercase);
|
||||||
let suffix = s.rest().into();
|
let suffix = s.after().into();
|
||||||
Ok(Self::Pattern(prefix.into(), numbering, upper, suffix))
|
Ok(Self::Pattern(prefix.into(), numbering, upper, suffix))
|
||||||
}
|
}
|
||||||
Value::Content(v) => Ok(Self::Content(v)),
|
Value::Content(v) => Ok(Self::Content(v)),
|
||||||
|
@ -3,13 +3,11 @@
|
|||||||
mod incremental;
|
mod incremental;
|
||||||
mod parser;
|
mod parser;
|
||||||
mod resolve;
|
mod resolve;
|
||||||
mod scanner;
|
|
||||||
mod tokens;
|
mod tokens;
|
||||||
|
|
||||||
pub use incremental::*;
|
pub use incremental::*;
|
||||||
pub use parser::*;
|
pub use parser::*;
|
||||||
pub use resolve::*;
|
pub use resolve::*;
|
||||||
pub use scanner::*;
|
|
||||||
pub use tokens::*;
|
pub use tokens::*;
|
||||||
|
|
||||||
use std::collections::HashSet;
|
use std::collections::HashSet;
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
use core::slice::SliceIndex;
|
|
||||||
use std::fmt::{self, Display, Formatter};
|
use std::fmt::{self, Display, Formatter};
|
||||||
use std::mem;
|
use std::mem;
|
||||||
|
use std::ops::Range;
|
||||||
|
|
||||||
use super::{TokenMode, Tokens};
|
use super::{TokenMode, Tokens};
|
||||||
use crate::syntax::{ErrorPos, Green, GreenData, GreenNode, NodeKind};
|
use crate::syntax::{ErrorPos, Green, GreenData, GreenNode, NodeKind};
|
||||||
@ -116,7 +116,7 @@ impl<'s> Parser<'s> {
|
|||||||
_ => false,
|
_ => false,
|
||||||
};
|
};
|
||||||
|
|
||||||
self.prev_end = self.tokens.index();
|
self.prev_end = self.tokens.cursor();
|
||||||
self.bump();
|
self.bump();
|
||||||
|
|
||||||
if self.tokens.mode() == TokenMode::Code {
|
if self.tokens.mode() == TokenMode::Code {
|
||||||
@ -186,15 +186,12 @@ impl<'s> Parser<'s> {
|
|||||||
|
|
||||||
/// Peek at the source of the current token.
|
/// Peek at the source of the current token.
|
||||||
pub fn peek_src(&self) -> &'s str {
|
pub fn peek_src(&self) -> &'s str {
|
||||||
self.tokens.scanner().get(self.current_start() .. self.current_end())
|
self.get(self.current_start() .. self.current_end())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Obtain a range of the source code.
|
/// Obtain a range of the source code.
|
||||||
pub fn get<I>(&self, index: I) -> &'s str
|
pub fn get(&self, range: Range<usize>) -> &'s str {
|
||||||
where
|
self.tokens.scanner().get(range)
|
||||||
I: SliceIndex<str, Output = str>,
|
|
||||||
{
|
|
||||||
self.tokens.scanner().get(index)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// The byte index at which the last non-trivia token ended.
|
/// The byte index at which the last non-trivia token ended.
|
||||||
@ -209,7 +206,7 @@ impl<'s> Parser<'s> {
|
|||||||
|
|
||||||
/// The byte index at which the current token ends.
|
/// The byte index at which the current token ends.
|
||||||
pub fn current_end(&self) -> usize {
|
pub fn current_end(&self) -> usize {
|
||||||
self.tokens.index()
|
self.tokens.cursor()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Determine the column index for the given byte index.
|
/// Determine the column index for the given byte index.
|
||||||
@ -294,8 +291,8 @@ impl<'s> Parser<'s> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
self.tokens.jump(target);
|
self.tokens.jump(target);
|
||||||
self.prev_end = self.tokens.index();
|
self.prev_end = self.tokens.cursor();
|
||||||
self.current_start = self.tokens.index();
|
self.current_start = self.tokens.cursor();
|
||||||
self.current = self.tokens.next();
|
self.current = self.tokens.next();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -311,9 +308,9 @@ impl<'s> Parser<'s> {
|
|||||||
/// handling.
|
/// handling.
|
||||||
fn bump(&mut self) {
|
fn bump(&mut self) {
|
||||||
let kind = self.current.take().unwrap();
|
let kind = self.current.take().unwrap();
|
||||||
let len = self.tokens.index() - self.current_start;
|
let len = self.tokens.cursor() - self.current_start;
|
||||||
self.children.push(GreenData::new(kind, len).into());
|
self.children.push(GreenData::new(kind, len).into());
|
||||||
self.current_start = self.tokens.index();
|
self.current_start = self.tokens.cursor();
|
||||||
self.current = self.tokens.next();
|
self.current = self.tokens.next();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,4 +1,6 @@
|
|||||||
use super::{is_ident, is_newline, Scanner};
|
use unscanny::Scanner;
|
||||||
|
|
||||||
|
use super::{is_ident, is_newline};
|
||||||
use crate::syntax::ast::RawNode;
|
use crate::syntax::ast::RawNode;
|
||||||
use crate::util::EcoString;
|
use crate::util::EcoString;
|
||||||
|
|
||||||
@ -13,7 +15,7 @@ pub fn resolve_string(string: &str) -> EcoString {
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
let start = s.last_index();
|
let start = s.locate(-1);
|
||||||
match s.eat() {
|
match s.eat() {
|
||||||
Some('\\') => out.push('\\'),
|
Some('\\') => out.push('\\'),
|
||||||
Some('"') => out.push('"'),
|
Some('"') => out.push('"'),
|
||||||
@ -22,17 +24,17 @@ pub fn resolve_string(string: &str) -> EcoString {
|
|||||||
Some('t') => out.push('\t'),
|
Some('t') => out.push('\t'),
|
||||||
Some('u') if s.eat_if('{') => {
|
Some('u') if s.eat_if('{') => {
|
||||||
// TODO: Feedback if closing brace is missing.
|
// TODO: Feedback if closing brace is missing.
|
||||||
let sequence = s.eat_while(|c| c.is_ascii_hexdigit());
|
let sequence = s.eat_while(char::is_ascii_hexdigit);
|
||||||
let _terminated = s.eat_if('}');
|
let _terminated = s.eat_if('}');
|
||||||
|
|
||||||
match resolve_hex(sequence) {
|
match resolve_hex(sequence) {
|
||||||
Some(c) => out.push(c),
|
Some(c) => out.push(c),
|
||||||
None => out.push_str(s.eaten_from(start)),
|
None => out.push_str(s.from(start)),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: Feedback about invalid escape sequence.
|
// TODO: Feedback about invalid escape sequence.
|
||||||
_ => out.push_str(s.eaten_from(start)),
|
_ => out.push_str(s.from(start)),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -68,8 +70,8 @@ pub fn resolve_raw(column: usize, backticks: usize, text: &str) -> RawNode {
|
|||||||
fn split_at_lang_tag(raw: &str) -> (&str, &str) {
|
fn split_at_lang_tag(raw: &str) -> (&str, &str) {
|
||||||
let mut s = Scanner::new(raw);
|
let mut s = Scanner::new(raw);
|
||||||
(
|
(
|
||||||
s.eat_until(|c| c == '`' || c.is_whitespace() || is_newline(c)),
|
s.eat_until(|c: char| c == '`' || c.is_whitespace() || is_newline(c)),
|
||||||
s.rest(),
|
s.after(),
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -129,9 +131,9 @@ fn split_lines(text: &str) -> Vec<&str> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
lines.push(&text[start .. end]);
|
lines.push(&text[start .. end]);
|
||||||
start = s.index();
|
start = s.cursor();
|
||||||
}
|
}
|
||||||
end = s.index();
|
end = s.cursor();
|
||||||
}
|
}
|
||||||
|
|
||||||
lines.push(&text[start ..]);
|
lines.push(&text[start ..]);
|
||||||
|
@ -1,211 +0,0 @@
|
|||||||
use std::slice::SliceIndex;
|
|
||||||
|
|
||||||
use unicode_xid::UnicodeXID;
|
|
||||||
|
|
||||||
/// A featureful char-based scanner.
|
|
||||||
#[derive(Copy, Clone)]
|
|
||||||
pub struct Scanner<'s> {
|
|
||||||
/// The string to scan.
|
|
||||||
src: &'s str,
|
|
||||||
/// The index at which the peekable character starts. Must be in bounds and
|
|
||||||
/// at a codepoint boundary to guarantee safety.
|
|
||||||
index: usize,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'s> Scanner<'s> {
|
|
||||||
/// Create a new char scanner.
|
|
||||||
#[inline]
|
|
||||||
pub fn new(src: &'s str) -> Self {
|
|
||||||
Self { src, index: 0 }
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Whether the end of the string is reached.
|
|
||||||
pub fn eof(&self) -> bool {
|
|
||||||
self.index == self.src.len()
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Consume the next char.
|
|
||||||
#[inline]
|
|
||||||
pub fn eat(&mut self) -> Option<char> {
|
|
||||||
let next = self.peek();
|
|
||||||
if let Some(c) = next {
|
|
||||||
self.index += c.len_utf8();
|
|
||||||
}
|
|
||||||
next
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Consume the next char if it is the given one.
|
|
||||||
///
|
|
||||||
/// Returns whether the char was consumed.
|
|
||||||
#[inline]
|
|
||||||
pub fn eat_if(&mut self, c: char) -> bool {
|
|
||||||
let matches = self.peek() == Some(c);
|
|
||||||
if matches {
|
|
||||||
self.index += c.len_utf8();
|
|
||||||
}
|
|
||||||
matches
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Consume the next char, debug-asserting that it is the given one.
|
|
||||||
#[inline]
|
|
||||||
pub fn eat_assert(&mut self, c: char) {
|
|
||||||
let next = self.eat();
|
|
||||||
debug_assert_eq!(next, Some(c));
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Eat chars while the condition is true.
|
|
||||||
#[inline]
|
|
||||||
pub fn eat_while<F>(&mut self, mut f: F) -> &'s str
|
|
||||||
where
|
|
||||||
F: FnMut(char) -> bool,
|
|
||||||
{
|
|
||||||
self.eat_until(|c| !f(c))
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Eat chars until the condition is true.
|
|
||||||
#[inline]
|
|
||||||
pub fn eat_until<F>(&mut self, mut f: F) -> &'s str
|
|
||||||
where
|
|
||||||
F: FnMut(char) -> bool,
|
|
||||||
{
|
|
||||||
let start = self.index;
|
|
||||||
while let Some(c) = self.peek() {
|
|
||||||
if f(c) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
self.index += c.len_utf8();
|
|
||||||
}
|
|
||||||
self.eaten_from(start)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Uneat the last eaten char.
|
|
||||||
#[inline]
|
|
||||||
pub fn uneat(&mut self) {
|
|
||||||
self.index = self.last_index();
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Peek at the next char without consuming it.
|
|
||||||
#[inline]
|
|
||||||
pub fn peek(&self) -> Option<char> {
|
|
||||||
self.rest().chars().next()
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Get the nth-previous eaten char.
|
|
||||||
#[inline]
|
|
||||||
pub fn prev(&self, n: usize) -> Option<char> {
|
|
||||||
self.eaten().chars().nth_back(n)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Checks whether the next char fulfills a condition.
|
|
||||||
///
|
|
||||||
/// Returns `default` if there is no next char.
|
|
||||||
#[inline]
|
|
||||||
pub fn check_or<F>(&self, default: bool, f: F) -> bool
|
|
||||||
where
|
|
||||||
F: FnOnce(char) -> bool,
|
|
||||||
{
|
|
||||||
self.peek().map_or(default, f)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// The previous index in the source string.
|
|
||||||
#[inline]
|
|
||||||
pub fn last_index(&self) -> usize {
|
|
||||||
self.eaten().chars().last().map_or(0, |c| self.index - c.len_utf8())
|
|
||||||
}
|
|
||||||
|
|
||||||
/// The current index in the source string.
|
|
||||||
#[inline]
|
|
||||||
pub fn index(&self) -> usize {
|
|
||||||
self.index
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Jump to an index in the source string.
|
|
||||||
#[inline]
|
|
||||||
pub fn jump(&mut self, index: usize) {
|
|
||||||
// Make sure that the index is in bounds and on a codepoint boundary.
|
|
||||||
self.src.get(index ..).expect("jumped to invalid index");
|
|
||||||
self.index = index;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// The full source string.
|
|
||||||
#[inline]
|
|
||||||
pub fn src(&self) -> &'s str {
|
|
||||||
self.src
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Slice out part of the source string.
|
|
||||||
#[inline]
|
|
||||||
pub fn get<I>(&self, index: I) -> &'s str
|
|
||||||
where
|
|
||||||
I: SliceIndex<str, Output = str>,
|
|
||||||
{
|
|
||||||
// See `eaten_from` for details about `unwrap_or_default`.
|
|
||||||
self.src.get(index).unwrap_or_default()
|
|
||||||
}
|
|
||||||
|
|
||||||
/// The remaining source string after the current index.
|
|
||||||
#[inline]
|
|
||||||
pub fn rest(&self) -> &'s str {
|
|
||||||
// Safety: The index is always in bounds and on a codepoint boundary
|
|
||||||
// since it starts at zero and is is:
|
|
||||||
// - either increased by the length of a scanned character, advacing
|
|
||||||
// from one codepoint boundary to the next,
|
|
||||||
// - or checked upon jumping.
|
|
||||||
unsafe { self.src.get_unchecked(self.index ..) }
|
|
||||||
}
|
|
||||||
|
|
||||||
/// The full source string up to the current index.
|
|
||||||
#[inline]
|
|
||||||
pub fn eaten(&self) -> &'s str {
|
|
||||||
// Safety: The index is always okay, for details see `rest()`.
|
|
||||||
unsafe { self.src.get_unchecked(.. self.index) }
|
|
||||||
}
|
|
||||||
|
|
||||||
/// The source string from `start` to the current index.
|
|
||||||
#[inline]
|
|
||||||
pub fn eaten_from(&self, start: usize) -> &'s str {
|
|
||||||
// Using `unwrap_or_default` is much faster than unwrap, probably
|
|
||||||
// because then the whole call to `eaten_from` is pure and can be
|
|
||||||
// optimized away in some cases.
|
|
||||||
self.src.get(start .. self.index).unwrap_or_default()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Whether this character denotes a newline.
|
|
||||||
#[inline]
|
|
||||||
pub fn is_newline(character: char) -> bool {
|
|
||||||
matches!(
|
|
||||||
character,
|
|
||||||
// Line Feed, Vertical Tab, Form Feed, Carriage Return.
|
|
||||||
'\n' | '\x0B' | '\x0C' | '\r' |
|
|
||||||
// Next Line, Line Separator, Paragraph Separator.
|
|
||||||
'\u{0085}' | '\u{2028}' | '\u{2029}'
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Whether a string is a valid unicode identifier.
|
|
||||||
///
|
|
||||||
/// In addition to what is specified in the [Unicode Standard][uax31], we allow:
|
|
||||||
/// - `_` as a starting character,
|
|
||||||
/// - `_` and `-` as continuing characters.
|
|
||||||
///
|
|
||||||
/// [uax31]: http://www.unicode.org/reports/tr31/
|
|
||||||
#[inline]
|
|
||||||
pub fn is_ident(string: &str) -> bool {
|
|
||||||
let mut chars = string.chars();
|
|
||||||
chars
|
|
||||||
.next()
|
|
||||||
.map_or(false, |c| is_id_start(c) && chars.all(is_id_continue))
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Whether a character can start an identifier.
|
|
||||||
#[inline]
|
|
||||||
pub fn is_id_start(c: char) -> bool {
|
|
||||||
c.is_xid_start() || c == '_'
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Whether a character can continue an identifier.
|
|
||||||
#[inline]
|
|
||||||
pub fn is_id_continue(c: char) -> bool {
|
|
||||||
c.is_xid_continue() || c == '_' || c == '-'
|
|
||||||
}
|
|
@ -1,9 +1,9 @@
|
|||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use super::{
|
use unicode_xid::UnicodeXID;
|
||||||
is_id_continue, is_id_start, is_newline, resolve_hex, resolve_raw, resolve_string,
|
use unscanny::Scanner;
|
||||||
Scanner,
|
|
||||||
};
|
use super::{resolve_hex, resolve_raw, resolve_string};
|
||||||
use crate::geom::{AngleUnit, LengthUnit};
|
use crate::geom::{AngleUnit, LengthUnit};
|
||||||
use crate::syntax::ast::{MathNode, RawNode, Unit};
|
use crate::syntax::ast::{MathNode, RawNode, Unit};
|
||||||
use crate::syntax::{ErrorPos, NodeKind};
|
use crate::syntax::{ErrorPos, NodeKind};
|
||||||
@ -65,13 +65,11 @@ impl<'s> Tokens<'s> {
|
|||||||
/// The index in the string at which the last token ends and next token
|
/// The index in the string at which the last token ends and next token
|
||||||
/// will start.
|
/// will start.
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn index(&self) -> usize {
|
pub fn cursor(&self) -> usize {
|
||||||
self.s.index()
|
self.s.cursor()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Jump to the given index in the string.
|
/// Jump to the given index in the string.
|
||||||
///
|
|
||||||
/// You need to know the correct column.
|
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn jump(&mut self, index: usize) {
|
pub fn jump(&mut self, index: usize) {
|
||||||
self.s.jump(index);
|
self.s.jump(index);
|
||||||
@ -92,7 +90,7 @@ impl<'s> Tokens<'s> {
|
|||||||
/// The column index of a given index in the source string.
|
/// The column index of a given index in the source string.
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn column(&self, index: usize) -> usize {
|
pub fn column(&self, index: usize) -> usize {
|
||||||
column(self.s.src(), index, self.column_offset)
|
column(self.s.string(), index, self.column_offset)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -102,7 +100,7 @@ impl<'s> Iterator for Tokens<'s> {
|
|||||||
/// Parse the next token in the source code.
|
/// Parse the next token in the source code.
|
||||||
#[inline]
|
#[inline]
|
||||||
fn next(&mut self) -> Option<Self::Item> {
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
let start = self.s.index();
|
let start = self.s.cursor();
|
||||||
let c = self.s.eat()?;
|
let c = self.s.eat()?;
|
||||||
Some(match c {
|
Some(match c {
|
||||||
// Blocks.
|
// Blocks.
|
||||||
@ -112,15 +110,13 @@ impl<'s> Iterator for Tokens<'s> {
|
|||||||
']' => NodeKind::RightBracket,
|
']' => NodeKind::RightBracket,
|
||||||
|
|
||||||
// Whitespace.
|
// Whitespace.
|
||||||
' ' if self.s.check_or(true, |c| !c.is_whitespace()) => NodeKind::Space(0),
|
' ' if self.s.done() || !self.s.at(char::is_whitespace) => NodeKind::Space(0),
|
||||||
c if c.is_whitespace() => self.whitespace(),
|
c if c.is_whitespace() => self.whitespace(),
|
||||||
|
|
||||||
// Comments with special case for URLs.
|
// Comments with special case for URLs.
|
||||||
'/' if self.s.eat_if('*') => self.block_comment(),
|
'/' if self.s.eat_if('*') => self.block_comment(),
|
||||||
'/' if !self.maybe_in_url() && self.s.eat_if('/') => self.line_comment(),
|
'/' if !self.maybe_in_url() && self.s.eat_if('/') => self.line_comment(),
|
||||||
'*' if self.s.eat_if('/') => {
|
'*' if self.s.eat_if('/') => NodeKind::Unknown(self.s.from(start).into()),
|
||||||
NodeKind::Unknown(self.s.eaten_from(start).into())
|
|
||||||
}
|
|
||||||
|
|
||||||
// Other things.
|
// Other things.
|
||||||
_ => match self.mode {
|
_ => match self.mode {
|
||||||
@ -187,22 +183,20 @@ impl<'s> Tokens<'s> {
|
|||||||
'=' => NodeKind::Eq,
|
'=' => NodeKind::Eq,
|
||||||
'<' => NodeKind::Lt,
|
'<' => NodeKind::Lt,
|
||||||
'>' => NodeKind::Gt,
|
'>' => NodeKind::Gt,
|
||||||
'.' if self.s.check_or(true, |n| !n.is_ascii_digit()) => NodeKind::Dot,
|
'.' if self.s.done() || !self.s.at(char::is_ascii_digit) => NodeKind::Dot,
|
||||||
|
|
||||||
// Identifiers.
|
// Identifiers.
|
||||||
c if is_id_start(c) => self.ident(start),
|
c if is_id_start(c) => self.ident(start),
|
||||||
|
|
||||||
// Numbers.
|
// Numbers.
|
||||||
c if c.is_ascii_digit()
|
c if c.is_ascii_digit() || (c == '.' && self.s.at(char::is_ascii_digit)) => {
|
||||||
|| (c == '.' && self.s.check_or(false, |n| n.is_ascii_digit())) =>
|
|
||||||
{
|
|
||||||
self.number(start, c)
|
self.number(start, c)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Strings.
|
// Strings.
|
||||||
'"' => self.string(),
|
'"' => self.string(),
|
||||||
|
|
||||||
_ => NodeKind::Unknown(self.s.eaten_from(start).into()),
|
_ => NodeKind::Unknown(self.s.from(start).into()),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -226,19 +220,19 @@ impl<'s> Tokens<'s> {
|
|||||||
};
|
};
|
||||||
|
|
||||||
loop {
|
loop {
|
||||||
self.s.eat_until(|c| {
|
self.s.eat_until(|c: char| {
|
||||||
TABLE.get(c as usize).copied().unwrap_or_else(|| c.is_whitespace())
|
TABLE.get(c as usize).copied().unwrap_or_else(|| c.is_whitespace())
|
||||||
});
|
});
|
||||||
|
|
||||||
let mut s = self.s;
|
let mut s = self.s;
|
||||||
if !(s.eat_if(' ') && s.check_or(false, char::is_alphanumeric)) {
|
if !(s.eat_if(' ') && s.at(char::is_alphanumeric)) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
self.s.eat();
|
self.s.eat();
|
||||||
}
|
}
|
||||||
|
|
||||||
NodeKind::Text(self.s.eaten_from(start).into())
|
NodeKind::Text(self.s.from(start).into())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn whitespace(&mut self) -> NodeKind {
|
fn whitespace(&mut self) -> NodeKind {
|
||||||
@ -276,13 +270,11 @@ impl<'s> Tokens<'s> {
|
|||||||
'[' | ']' | '{' | '}' | '#' |
|
'[' | ']' | '{' | '}' | '#' |
|
||||||
// Markup.
|
// Markup.
|
||||||
'~' | '\'' | '"' | '*' | '_' | '`' | '$' | '=' | '-' | '.' => {
|
'~' | '\'' | '"' | '*' | '_' | '`' | '$' | '=' | '-' | '.' => {
|
||||||
self.s.eat_assert(c) ;
|
self.s.expect(c);
|
||||||
NodeKind::Escape(c)
|
NodeKind::Escape(c)
|
||||||
}
|
}
|
||||||
'u' if self.s.rest().starts_with("u{") => {
|
'u' if self.s.eat_if("u{") => {
|
||||||
self.s.eat_assert('u');
|
let sequence = self.s.eat_while(char::is_ascii_alphanumeric);
|
||||||
self.s.eat_assert('{');
|
|
||||||
let sequence = self.s.eat_while(|c| c.is_ascii_alphanumeric());
|
|
||||||
if self.s.eat_if('}') {
|
if self.s.eat_if('}') {
|
||||||
if let Some(c) = resolve_hex(sequence) {
|
if let Some(c) = resolve_hex(sequence) {
|
||||||
NodeKind::Escape(c)
|
NodeKind::Escape(c)
|
||||||
@ -304,7 +296,7 @@ impl<'s> Tokens<'s> {
|
|||||||
// Linebreaks.
|
// Linebreaks.
|
||||||
c if c.is_whitespace() => NodeKind::Linebreak(false),
|
c if c.is_whitespace() => NodeKind::Linebreak(false),
|
||||||
'+' => {
|
'+' => {
|
||||||
self.s.eat_assert(c);
|
self.s.expect(c);
|
||||||
NodeKind::Linebreak(true)
|
NodeKind::Linebreak(true)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -315,7 +307,7 @@ impl<'s> Tokens<'s> {
|
|||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
fn hash(&mut self) -> NodeKind {
|
fn hash(&mut self) -> NodeKind {
|
||||||
if self.s.check_or(false, is_id_start) {
|
if self.s.at(is_id_start) {
|
||||||
let read = self.s.eat_while(is_id_continue);
|
let read = self.s.eat_while(is_id_continue);
|
||||||
match keyword(read) {
|
match keyword(read) {
|
||||||
Some(keyword) => keyword,
|
Some(keyword) => keyword,
|
||||||
@ -342,10 +334,10 @@ impl<'s> Tokens<'s> {
|
|||||||
|
|
||||||
fn numbering(&mut self, start: usize, c: char) -> NodeKind {
|
fn numbering(&mut self, start: usize, c: char) -> NodeKind {
|
||||||
let number = if c != '.' {
|
let number = if c != '.' {
|
||||||
self.s.eat_while(|c| c.is_ascii_digit());
|
self.s.eat_while(char::is_ascii_digit);
|
||||||
let read = self.s.eaten_from(start);
|
let read = self.s.from(start);
|
||||||
if !self.s.eat_if('.') {
|
if !self.s.eat_if('.') {
|
||||||
return NodeKind::Text(self.s.eaten_from(start).into());
|
return NodeKind::Text(self.s.from(start).into());
|
||||||
}
|
}
|
||||||
read.parse().ok()
|
read.parse().ok()
|
||||||
} else {
|
} else {
|
||||||
@ -356,7 +348,7 @@ impl<'s> Tokens<'s> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn raw(&mut self) -> NodeKind {
|
fn raw(&mut self) -> NodeKind {
|
||||||
let column = self.column(self.s.index() - 1);
|
let column = self.column(self.s.cursor() - 1);
|
||||||
|
|
||||||
let mut backticks = 1;
|
let mut backticks = 1;
|
||||||
while self.s.eat_if('`') {
|
while self.s.eat_if('`') {
|
||||||
@ -372,7 +364,7 @@ impl<'s> Tokens<'s> {
|
|||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
|
|
||||||
let start = self.s.index();
|
let start = self.s.cursor();
|
||||||
|
|
||||||
let mut found = 0;
|
let mut found = 0;
|
||||||
while found < backticks {
|
while found < backticks {
|
||||||
@ -384,7 +376,7 @@ impl<'s> Tokens<'s> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if found == backticks {
|
if found == backticks {
|
||||||
let end = self.s.index() - found as usize;
|
let end = self.s.cursor() - found as usize;
|
||||||
NodeKind::Raw(Arc::new(resolve_raw(
|
NodeKind::Raw(Arc::new(resolve_raw(
|
||||||
column,
|
column,
|
||||||
backticks,
|
backticks,
|
||||||
@ -412,7 +404,7 @@ impl<'s> Tokens<'s> {
|
|||||||
display = true;
|
display = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
let start = self.s.index();
|
let start = self.s.cursor();
|
||||||
|
|
||||||
let mut escaped = false;
|
let mut escaped = false;
|
||||||
let mut dollar = !display;
|
let mut dollar = !display;
|
||||||
@ -429,7 +421,7 @@ impl<'s> Tokens<'s> {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
let end = self.s.index()
|
let end = self.s.cursor()
|
||||||
- match (terminated, display) {
|
- match (terminated, display) {
|
||||||
(false, _) => 0,
|
(false, _) => 0,
|
||||||
(true, false) => 1,
|
(true, false) => 1,
|
||||||
@ -456,7 +448,7 @@ impl<'s> Tokens<'s> {
|
|||||||
|
|
||||||
fn ident(&mut self, start: usize) -> NodeKind {
|
fn ident(&mut self, start: usize) -> NodeKind {
|
||||||
self.s.eat_while(is_id_continue);
|
self.s.eat_while(is_id_continue);
|
||||||
match self.s.eaten_from(start) {
|
match self.s.from(start) {
|
||||||
"none" => NodeKind::None,
|
"none" => NodeKind::None,
|
||||||
"auto" => NodeKind::Auto,
|
"auto" => NodeKind::Auto,
|
||||||
"true" => NodeKind::Bool(true),
|
"true" => NodeKind::Bool(true),
|
||||||
@ -467,30 +459,29 @@ impl<'s> Tokens<'s> {
|
|||||||
|
|
||||||
fn number(&mut self, start: usize, c: char) -> NodeKind {
|
fn number(&mut self, start: usize, c: char) -> NodeKind {
|
||||||
// Read the first part (integer or fractional depending on `first`).
|
// Read the first part (integer or fractional depending on `first`).
|
||||||
self.s.eat_while(|c| c.is_ascii_digit());
|
self.s.eat_while(char::is_ascii_digit);
|
||||||
|
|
||||||
// Read the fractional part if not already done.
|
// Read the fractional part if not already done.
|
||||||
// Make sure not to confuse a range for the decimal separator.
|
// Make sure not to confuse a range for the decimal separator.
|
||||||
if c != '.' && !self.s.rest().starts_with("..") && self.s.eat_if('.') {
|
if c != '.' && !self.s.at("..") && self.s.eat_if('.') {
|
||||||
self.s.eat_while(|c| c.is_ascii_digit());
|
self.s.eat_while(char::is_ascii_digit);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Read the exponent.
|
// Read the exponent.
|
||||||
let em = self.s.rest().starts_with("em");
|
if !self.s.at("em") && self.s.eat_if(['e', 'E']) {
|
||||||
if !em && self.s.eat_if('e') || self.s.eat_if('E') {
|
self.s.eat_if(['+', '-']);
|
||||||
let _ = self.s.eat_if('+') || self.s.eat_if('-');
|
self.s.eat_while(char::is_ascii_digit);
|
||||||
self.s.eat_while(|c| c.is_ascii_digit());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Read the suffix.
|
// Read the suffix.
|
||||||
let suffix_start = self.s.index();
|
let suffix_start = self.s.cursor();
|
||||||
if !self.s.eat_if('%') {
|
if !self.s.eat_if('%') {
|
||||||
self.s.eat_while(|c| c.is_ascii_alphanumeric());
|
self.s.eat_while(char::is_ascii_alphanumeric);
|
||||||
}
|
}
|
||||||
|
|
||||||
let number = self.s.get(start .. suffix_start);
|
let number = self.s.get(start .. suffix_start);
|
||||||
let suffix = self.s.eaten_from(suffix_start);
|
let suffix = self.s.from(suffix_start);
|
||||||
let all = self.s.eaten_from(start);
|
let all = self.s.from(start);
|
||||||
|
|
||||||
// Find out whether it is a simple number.
|
// Find out whether it is a simple number.
|
||||||
if suffix.is_empty() {
|
if suffix.is_empty() {
|
||||||
@ -575,13 +566,13 @@ impl<'s> Tokens<'s> {
|
|||||||
|
|
||||||
fn in_word(&self) -> bool {
|
fn in_word(&self) -> bool {
|
||||||
let alphanumeric = |c: Option<char>| c.map_or(false, |c| c.is_alphanumeric());
|
let alphanumeric = |c: Option<char>| c.map_or(false, |c| c.is_alphanumeric());
|
||||||
let prev = self.s.prev(1);
|
let prev = self.s.scout(-2);
|
||||||
let next = self.s.peek();
|
let next = self.s.peek();
|
||||||
alphanumeric(prev) && alphanumeric(next)
|
alphanumeric(prev) && alphanumeric(next)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn maybe_in_url(&self) -> bool {
|
fn maybe_in_url(&self) -> bool {
|
||||||
self.mode == TokenMode::Markup && self.s.eaten().ends_with(":/")
|
self.mode == TokenMode::Markup && self.s.before().ends_with(":/")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -610,7 +601,8 @@ fn keyword(ident: &str) -> Option<NodeKind> {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
/// The column index of a given index in the source string, given a column offset for the first line.
|
/// The column index of a given index in the source string, given a column
|
||||||
|
/// offset for the first line.
|
||||||
#[inline]
|
#[inline]
|
||||||
fn column(string: &str, index: usize, offset: usize) -> usize {
|
fn column(string: &str, index: usize, offset: usize) -> usize {
|
||||||
let mut apply_offset = false;
|
let mut apply_offset = false;
|
||||||
@ -634,6 +626,45 @@ fn column(string: &str, index: usize, offset: usize) -> usize {
|
|||||||
if apply_offset { res + offset } else { res }
|
if apply_offset { res + offset } else { res }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Whether this character denotes a newline.
|
||||||
|
#[inline]
|
||||||
|
pub fn is_newline(character: char) -> bool {
|
||||||
|
matches!(
|
||||||
|
character,
|
||||||
|
// Line Feed, Vertical Tab, Form Feed, Carriage Return.
|
||||||
|
'\n' | '\x0B' | '\x0C' | '\r' |
|
||||||
|
// Next Line, Line Separator, Paragraph Separator.
|
||||||
|
'\u{0085}' | '\u{2028}' | '\u{2029}'
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Whether a string is a valid unicode identifier.
|
||||||
|
///
|
||||||
|
/// In addition to what is specified in the [Unicode Standard][uax31], we allow:
|
||||||
|
/// - `_` as a starting character,
|
||||||
|
/// - `_` and `-` as continuing characters.
|
||||||
|
///
|
||||||
|
/// [uax31]: http://www.unicode.org/reports/tr31/
|
||||||
|
#[inline]
|
||||||
|
pub fn is_ident(string: &str) -> bool {
|
||||||
|
let mut chars = string.chars();
|
||||||
|
chars
|
||||||
|
.next()
|
||||||
|
.map_or(false, |c| is_id_start(c) && chars.all(is_id_continue))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Whether a character can start an identifier.
|
||||||
|
#[inline]
|
||||||
|
pub fn is_id_start(c: char) -> bool {
|
||||||
|
c.is_xid_start() || c == '_'
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Whether a character can continue an identifier.
|
||||||
|
#[inline]
|
||||||
|
pub fn is_id_continue(c: char) -> bool {
|
||||||
|
c.is_xid_continue() || c == '_' || c == '-'
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
#[allow(non_snake_case)]
|
#[allow(non_snake_case)]
|
||||||
mod tests {
|
mod tests {
|
||||||
|
@ -6,9 +6,11 @@ use std::ops::Range;
|
|||||||
use std::path::{Path, PathBuf};
|
use std::path::{Path, PathBuf};
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use unscanny::Scanner;
|
||||||
|
|
||||||
use crate::diag::TypResult;
|
use crate::diag::TypResult;
|
||||||
use crate::loading::{FileHash, Loader};
|
use crate::loading::{FileHash, Loader};
|
||||||
use crate::parse::{is_newline, parse, Reparser, Scanner};
|
use crate::parse::{is_newline, parse, Reparser};
|
||||||
use crate::syntax::ast::Markup;
|
use crate::syntax::ast::Markup;
|
||||||
use crate::syntax::{self, Category, GreenNode, RedNode};
|
use crate::syntax::{self, Category, GreenNode, RedNode};
|
||||||
use crate::util::{PathExt, StrExt};
|
use crate::util::{PathExt, StrExt};
|
||||||
@ -382,12 +384,12 @@ impl Line {
|
|||||||
let mut utf16_idx = utf16_offset;
|
let mut utf16_idx = utf16_offset;
|
||||||
|
|
||||||
std::iter::from_fn(move || {
|
std::iter::from_fn(move || {
|
||||||
s.eat_until(|c| {
|
s.eat_until(|c: char| {
|
||||||
utf16_idx += c.len_utf16();
|
utf16_idx += c.len_utf16();
|
||||||
is_newline(c)
|
is_newline(c)
|
||||||
});
|
});
|
||||||
|
|
||||||
if s.eof() {
|
if s.done() {
|
||||||
return None;
|
return None;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -396,7 +398,7 @@ impl Line {
|
|||||||
}
|
}
|
||||||
|
|
||||||
Some(Line {
|
Some(Line {
|
||||||
byte_idx: byte_offset + s.index(),
|
byte_idx: byte_offset + s.cursor(),
|
||||||
utf16_idx,
|
utf16_idx,
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
@ -6,6 +6,7 @@ use std::path::Path;
|
|||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use tiny_skia as sk;
|
use tiny_skia as sk;
|
||||||
|
use unscanny::Scanner;
|
||||||
use walkdir::WalkDir;
|
use walkdir::WalkDir;
|
||||||
|
|
||||||
use typst::diag::Error;
|
use typst::diag::Error;
|
||||||
@ -15,7 +16,6 @@ use typst::geom::{Length, RgbaColor};
|
|||||||
use typst::library::layout::PageNode;
|
use typst::library::layout::PageNode;
|
||||||
use typst::library::text::{TextNode, TextSize};
|
use typst::library::text::{TextNode, TextSize};
|
||||||
use typst::loading::FsLoader;
|
use typst::loading::FsLoader;
|
||||||
use typst::parse::Scanner;
|
|
||||||
use typst::source::SourceFile;
|
use typst::source::SourceFile;
|
||||||
use typst::syntax::Span;
|
use typst::syntax::Span;
|
||||||
use typst::{bail, Context};
|
use typst::{bail, Context};
|
||||||
@ -329,7 +329,7 @@ fn parse_metadata(source: &SourceFile) -> (Option<bool>, Vec<Error>) {
|
|||||||
};
|
};
|
||||||
|
|
||||||
fn num(s: &mut Scanner) -> usize {
|
fn num(s: &mut Scanner) -> usize {
|
||||||
s.eat_while(|c| c.is_numeric()).parse().unwrap()
|
s.eat_while(char::is_numeric).parse().unwrap()
|
||||||
}
|
}
|
||||||
|
|
||||||
let comments =
|
let comments =
|
||||||
@ -348,7 +348,7 @@ fn parse_metadata(source: &SourceFile) -> (Option<bool>, Vec<Error>) {
|
|||||||
let end = if s.eat_if('-') { pos(&mut s) } else { start };
|
let end = if s.eat_if('-') { pos(&mut s) } else { start };
|
||||||
let span = Span::new(source.id(), start, end);
|
let span = Span::new(source.id(), start, end);
|
||||||
|
|
||||||
errors.push(Error::new(span, s.rest().trim()));
|
errors.push(Error::new(span, s.after().trim()));
|
||||||
}
|
}
|
||||||
|
|
||||||
(compare_ref, errors)
|
(compare_ref, errors)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user