Code Review: Life is Like a Box of Iterators

This commit is contained in:
Martin Haug 2021-11-01 13:03:18 +01:00
parent 7d34a548cc
commit 49fb3cd4e2
9 changed files with 150 additions and 142 deletions

View File

@ -6,7 +6,7 @@ use typst::eval::eval;
use typst::layout::layout;
use typst::loading::MemLoader;
use typst::parse::{parse, Scanner, TokenMode, Tokens};
use typst::source::{SourceFile, SourceId};
use typst::source::SourceId;
use typst::Context;
const SRC: &str = include_str!("bench.typ");
@ -44,13 +44,11 @@ fn bench_scan(iai: &mut Iai) {
}
fn bench_tokenize(iai: &mut Iai) {
let src = SourceFile::detached(SRC);
iai.run(|| Tokens::new(black_box(&src), black_box(TokenMode::Markup)).count());
iai.run(|| Tokens::new(black_box(&SRC), black_box(TokenMode::Markup)).count());
}
fn bench_parse(iai: &mut Iai) {
let src = SourceFile::detached(SRC);
iai.run(|| parse(&src));
iai.run(|| parse(&SRC));
}
fn bench_eval(iai: &mut Iai) {

View File

@ -12,12 +12,11 @@ pub use tokens::*;
use std::rc::Rc;
use crate::source::SourceFile;
use crate::syntax::*;
use crate::util::EcoString;
/// Parse a source file.
pub fn parse(source: &SourceFile) -> Rc<GreenNode> {
pub fn parse(source: &str) -> Rc<GreenNode> {
let mut p = Parser::new(source);
markup(&mut p);
p.finish()

View File

@ -1,15 +1,14 @@
use std::ops::Range;
use std::rc::Rc;
use super::{TokenMode, Tokens};
use crate::source::{SourceFile, SourceId};
use super::{is_newline, TokenMode, Tokens};
use crate::syntax::{ErrorPosition, Green, GreenData, GreenNode, NodeKind};
use crate::util::EcoString;
/// A convenient token-based parser.
pub struct Parser<'s> {
/// The parsed file.
source: &'s SourceFile,
src: &'s str,
/// An iterator over the source tokens.
tokens: Tokens<'s>,
/// The stack of open groups.
@ -61,11 +60,11 @@ pub enum Group {
impl<'s> Parser<'s> {
/// Create a new parser for the source string.
pub fn new(source: &'s SourceFile) -> Self {
let mut tokens = Tokens::new(source, TokenMode::Markup);
pub fn new(src: &'s str) -> Self {
let mut tokens = Tokens::new(src, TokenMode::Markup);
let next = tokens.next();
Self {
source,
src,
tokens,
groups: vec![],
next: next.clone(),
@ -78,11 +77,6 @@ impl<'s> Parser<'s> {
}
}
/// The id of the parsed source file.
pub fn id(&self) -> SourceId {
self.source.id()
}
/// Start a nested node.
///
/// Each start call has to be matched with a call to `end`,
@ -366,12 +360,16 @@ impl<'s> Parser<'s> {
/// Determine the column index for the given byte index.
pub fn column(&self, index: usize) -> usize {
self.source.byte_to_column(index).unwrap()
self.src[.. index]
.chars()
.rev()
.take_while(|&c| !is_newline(c))
.count()
}
/// Slice out part of the source string.
pub fn get(&self, range: Range<usize>) -> &'s str {
self.source.get(range).unwrap()
self.src.get(range).unwrap()
}
/// Continue parsing in a group.

View File

@ -1,5 +1,5 @@
use super::{is_newline, Scanner};
use crate::syntax::RawToken;
use crate::syntax::RawData;
use crate::util::EcoString;
/// Resolve all escape sequences in a string.
@ -46,18 +46,18 @@ pub fn resolve_hex(sequence: &str) -> Option<char> {
}
/// Resolve the language tag and trims the raw text.
pub fn resolve_raw(column: usize, backticks: u8, text: &str) -> RawToken {
pub fn resolve_raw(column: usize, backticks: u8, text: &str) -> RawData {
if backticks > 1 {
let (tag, inner) = split_at_lang_tag(text);
let (text, block) = trim_and_split_raw(column, inner);
RawToken {
RawData {
lang: Some(tag.into()),
text: text.into(),
backticks,
block,
}
} else {
RawToken {
RawData {
lang: None,
text: split_lines(text).join("\n").into(),
backticks,

View File

@ -106,6 +106,16 @@ impl<'s> Scanner<'s> {
self.index
}
/// The column index of a given index in the source string.
#[inline]
pub fn column(&self, index: usize) -> usize {
self.src[.. index]
.chars()
.rev()
.take_while(|&c| !is_newline(c))
.count()
}
/// Jump to an index in the source string.
#[inline]
pub fn jump(&mut self, index: usize) {

View File

@ -1,7 +1,6 @@
use super::{is_newline, resolve_raw, Scanner};
use crate::geom::{AngularUnit, LengthUnit};
use crate::parse::resolve::{resolve_hex, resolve_string};
use crate::source::SourceFile;
use crate::syntax::*;
use crate::util::EcoString;
@ -9,7 +8,6 @@ use std::rc::Rc;
/// An iterator over the tokens of a string of source code.
pub struct Tokens<'s> {
source: &'s SourceFile,
s: Scanner<'s>,
mode: TokenMode,
}
@ -26,12 +24,8 @@ pub enum TokenMode {
impl<'s> Tokens<'s> {
/// Create a new token iterator with the given mode.
#[inline]
pub fn new(source: &'s SourceFile, mode: TokenMode) -> Self {
Self {
s: Scanner::new(source.src()),
source,
mode,
}
pub fn new(source: &'s str, mode: TokenMode) -> Self {
Self { s: Scanner::new(source), mode }
}
/// Get the current token mode.
@ -244,7 +238,7 @@ impl<'s> Tokens<'s> {
if self.s.eat_if('}') {
if let Some(character) = resolve_hex(&sequence) {
NodeKind::UnicodeEscape(UnicodeEscapeToken {
NodeKind::UnicodeEscape(UnicodeEscapeData {
character,
})
} else {
@ -314,7 +308,7 @@ impl<'s> Tokens<'s> {
}
fn raw(&mut self) -> NodeKind {
let column = self.source.byte_to_column(self.s.index() - 1).unwrap();
let column = self.s.column(self.s.index() - 1);
let mut backticks = 1;
while self.s.eat_if('`') && backticks < u8::MAX {
backticks += 1;
@ -322,7 +316,7 @@ impl<'s> Tokens<'s> {
// Special case for empty inline block.
if backticks == 2 {
return NodeKind::Raw(Rc::new(RawToken {
return NodeKind::Raw(Rc::new(RawData {
text: EcoString::new(),
lang: None,
backticks: 1,
@ -397,7 +391,7 @@ impl<'s> Tokens<'s> {
};
if terminated {
NodeKind::Math(Rc::new(MathToken {
NodeKind::Math(Rc::new(MathData {
formula: self.s.get(start .. end).into(),
display,
}))
@ -492,7 +486,7 @@ impl<'s> Tokens<'s> {
}
}));
if self.s.eat_if('"') {
NodeKind::Str(StrToken { string })
NodeKind::Str(StrData { string })
} else {
NodeKind::Error(ErrorPosition::End, "expected quote".into())
}
@ -567,7 +561,7 @@ mod tests {
use TokenMode::{Code, Markup};
fn UnicodeEscape(character: char) -> NodeKind {
NodeKind::UnicodeEscape(UnicodeEscapeToken { character })
NodeKind::UnicodeEscape(UnicodeEscapeData { character })
}
fn Error(pos: ErrorPosition, message: &str) -> NodeKind {
@ -575,7 +569,7 @@ mod tests {
}
fn Raw(text: &str, lang: Option<&str>, backticks_left: u8, block: bool) -> NodeKind {
NodeKind::Raw(Rc::new(RawToken {
NodeKind::Raw(Rc::new(RawData {
text: text.into(),
lang: lang.map(Into::into),
backticks: backticks_left,
@ -586,7 +580,7 @@ mod tests {
fn Math(formula: &str, display: bool, err_msg: Option<&str>) -> NodeKind {
match err_msg {
None => {
NodeKind::Math(Rc::new(MathToken { formula: formula.into(), display }))
NodeKind::Math(Rc::new(MathData { formula: formula.into(), display }))
}
Some(msg) => NodeKind::Error(
ErrorPosition::End,
@ -597,7 +591,7 @@ mod tests {
fn Str(string: &str, terminated: bool) -> NodeKind {
if terminated {
NodeKind::Str(StrToken { string: string.into() })
NodeKind::Str(StrData { string: string.into() })
} else {
NodeKind::Error(ErrorPosition::End, "expected quote".into())
}
@ -687,7 +681,7 @@ mod tests {
}};
(@$mode:ident: $src:expr => $($token:expr),*) => {{
let src = $src;
let found = Tokens::new(&SourceFile::detached(src.clone()), $mode).collect::<Vec<_>>();
let found = Tokens::new(&src, $mode).collect::<Vec<_>>();
let expected = vec![$($token.clone()),*];
check(&src, found, expected);
}};

View File

@ -8,10 +8,10 @@ use std::rc::Rc;
use serde::{Deserialize, Serialize};
use crate::diag::{Error, TypResult};
use crate::diag::TypResult;
use crate::loading::{FileHash, Loader};
use crate::parse::{is_newline, parse, Scanner};
use crate::syntax::{GreenNode, Markup, NodeKind, RedNode};
use crate::syntax::{GreenNode, Markup, RedNode};
use crate::util::PathExt;
#[cfg(feature = "codespan-reporting")]
@ -134,28 +134,22 @@ impl SourceFile {
pub fn new(id: SourceId, path: &Path, src: String) -> Self {
let mut line_starts = vec![0];
line_starts.extend(newlines(&src));
let mut init = Self {
Self {
id,
path: path.normalize(),
root: parse(&src),
src,
line_starts,
root: Rc::new(GreenNode::new(NodeKind::Markup, 0)),
};
let root = parse(&init);
init.root = root;
init
}
}
pub fn ast(&self) -> TypResult<Markup> {
let red = RedNode::new_root(self.root.clone(), self.id);
let errors = red.errors();
if errors.is_empty() {
Ok(red.as_ref().cast().unwrap())
Ok(red.cast().unwrap())
} else {
Err(Box::new(
errors.into_iter().map(|(span, msg)| Error::new(span, msg)).collect(),
))
Err(Box::new(errors))
}
}

View File

@ -1,8 +1,39 @@
use super::{Ident, NodeKind, RedNode, RedRef, Span, TypedNode};
use crate::geom::{AngularUnit, LengthUnit};
use crate::node;
use crate::util::EcoString;
macro_rules! node {
($(#[$attr:meta])* $name:ident) => {
node!{$(#[$attr])* $name => $name}
};
($(#[$attr:meta])* $variant:ident => $name:ident) => {
#[derive(Debug, Clone, PartialEq)]
#[repr(transparent)]
$(#[$attr])*
pub struct $name(RedNode);
impl TypedNode for $name {
fn cast_from(node: RedRef) -> Option<Self> {
if node.kind() != &NodeKind::$variant {
return None;
}
Some(Self(node.own()))
}
}
impl $name {
pub fn span(&self) -> Span {
self.0.span()
}
pub fn underlying(&self) -> RedRef {
self.0.as_ref()
}
}
};
}
node! {
/// The syntactical root capable of representing a full parsed document.
Markup

View File

@ -15,6 +15,7 @@ pub use ident::*;
pub use pretty::*;
pub use span::*;
use crate::diag::Error;
use crate::geom::{AngularUnit, LengthUnit};
use crate::source::SourceId;
use crate::util::EcoString;
@ -94,9 +95,9 @@ impl GreenNode {
}
pub fn with_children(kind: NodeKind, len: usize, children: Vec<Green>) -> Self {
let mut meta = GreenData::new(kind, len);
meta.erroneous |= children.iter().any(|c| c.erroneous());
Self { data: meta, children }
let mut data = GreenData::new(kind, len);
data.erroneous |= children.iter().any(|c| c.erroneous());
Self { data, children }
}
pub fn with_child(kind: NodeKind, len: usize, child: impl Into<Green>) -> Self {
@ -180,6 +181,10 @@ impl<'a> RedRef<'a> {
Span::new(self.id, self.offset, self.offset + self.green.len())
}
pub fn len(&self) -> usize {
self.green.len()
}
pub fn cast<T>(self) -> Option<T>
where
T: TypedNode,
@ -205,6 +210,29 @@ impl<'a> RedRef<'a> {
})
}
pub fn errors(&self) -> Vec<Error> {
if !self.green.erroneous() {
return vec![];
}
match self.kind() {
NodeKind::Error(pos, msg) => {
let span = match pos {
ErrorPosition::Start => self.span().at_start(),
ErrorPosition::Full => self.span(),
ErrorPosition::End => self.span().at_end(),
};
vec![Error::new(span, msg.to_string())]
}
_ => self
.children()
.filter(|red| red.green.erroneous())
.flat_map(|red| red.errors())
.collect(),
}
}
pub(crate) fn typed_child(&self, kind: &NodeKind) -> Option<RedRef> {
self.children()
.find(|x| mem::discriminant(x.kind()) == mem::discriminant(kind))
@ -219,6 +247,18 @@ impl<'a> RedRef<'a> {
}
}
impl Debug for RedRef<'_> {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
write!(f, "{:?}: {:?}", self.kind(), self.span())?;
let mut children = self.children().peekable();
if children.peek().is_some() {
f.write_str(" ")?;
f.debug_list().entries(children.map(RedRef::own)).finish()?;
}
Ok(())
}
}
#[derive(Clone, PartialEq)]
pub struct RedNode {
id: SourceId,
@ -231,12 +271,27 @@ impl RedNode {
Self { id, offset: 0, green: root.into() }
}
pub fn as_ref<'a>(&'a self) -> RedRef<'a> {
RedRef {
id: self.id,
offset: self.offset,
green: &self.green,
}
}
pub fn span(&self) -> Span {
self.as_ref().span()
}
pub fn len(&self) -> usize {
self.green.len()
self.as_ref().len()
}
pub fn cast<T>(self) -> Option<T>
where
T: TypedNode,
{
T::cast_from(self.as_ref())
}
pub fn kind(&self) -> &NodeKind {
@ -247,36 +302,8 @@ impl RedNode {
self.as_ref().children()
}
pub fn errors(&self) -> Vec<(Span, EcoString)> {
if !self.green.erroneous() {
return vec![];
}
match self.kind() {
NodeKind::Error(pos, msg) => {
let span = match pos {
ErrorPosition::Start => self.span().at_start(),
ErrorPosition::Full => self.span(),
ErrorPosition::End => self.span().at_end(),
};
vec![(span, msg.clone())]
}
_ => self
.as_ref()
.children()
.filter(|red| red.green.erroneous())
.flat_map(|red| red.own().errors())
.collect(),
}
}
pub fn as_ref<'a>(&'a self) -> RedRef<'a> {
RedRef {
id: self.id,
offset: self.offset,
green: &self.green,
}
pub fn errors<'a>(&'a self) -> Vec<Error> {
self.as_ref().errors()
}
pub(crate) fn typed_child(&self, kind: &NodeKind) -> Option<RedNode> {
@ -294,15 +321,7 @@ impl RedNode {
impl Debug for RedNode {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
write!(f, "{:?}: {:?}", self.kind(), self.span())?;
let children = self.as_ref().children().collect::<Vec<_>>();
if !children.is_empty() {
f.write_str(" ")?;
f.debug_list()
.entries(children.into_iter().map(RedRef::own))
.finish()?;
}
Ok(())
self.as_ref().fmt(f)
}
}
@ -419,7 +438,7 @@ pub enum NodeKind {
EmDash,
/// A slash and the letter "u" followed by a hexadecimal unicode entity
/// enclosed in curly braces: `\u{1F5FA}`.
UnicodeEscape(UnicodeEscapeToken),
UnicodeEscape(UnicodeEscapeData),
/// Strong text was enabled / disabled: `*`.
Strong,
/// Emphasized text was enabled / disabled: `_`.
@ -440,9 +459,9 @@ pub enum NodeKind {
ListBullet,
/// An arbitrary number of backticks followed by inner contents, terminated
/// with the same number of backticks: `` `...` ``.
Raw(Rc<RawToken>),
Raw(Rc<RawData>),
/// Dollar signs surrounding inner contents.
Math(Rc<MathToken>),
Math(Rc<MathData>),
/// An identifier: `center`.
Ident(EcoString),
/// A boolean: `true`, `false`.
@ -463,7 +482,7 @@ pub enum NodeKind {
/// A fraction unit: `3fr`.
Fraction(f64),
/// A quoted string: `"..."`.
Str(StrToken),
Str(StrData),
/// An array expression: `(1, "hi", 12cm)`.
Array,
/// A dictionary expression: `(thickness: 3pt, pattern: dashed)`.
@ -534,15 +553,14 @@ pub enum ErrorPosition {
/// A quoted string token: `"..."`.
#[derive(Debug, Clone, PartialEq)]
#[repr(transparent)]
pub struct StrToken {
pub struct StrData {
/// The string inside the quotes.
pub string: EcoString,
}
/// A raw block token: `` `...` ``.
#[derive(Debug, Clone, PartialEq)]
pub struct RawToken {
pub struct RawData {
/// The raw text in the block.
pub text: EcoString,
/// The programming language of the raw text.
@ -555,7 +573,7 @@ pub struct RawToken {
/// A math formula token: `$2pi + x$` or `$[f'(x) = x^2]$`.
#[derive(Debug, Clone, PartialEq)]
pub struct MathToken {
pub struct MathData {
/// The formula between the dollars.
pub formula: EcoString,
/// Whether the formula is display-level, that is, it is surrounded by
@ -565,8 +583,7 @@ pub struct MathToken {
/// A unicode escape sequence token: `\u{1F5FA}`.
#[derive(Debug, Clone, PartialEq)]
#[repr(transparent)]
pub struct UnicodeEscapeToken {
pub struct UnicodeEscapeData {
/// The resulting unicode character.
pub character: char,
}
@ -712,36 +729,3 @@ impl NodeKind {
}
}
}
#[macro_export]
macro_rules! node {
($(#[$attr:meta])* $name:ident) => {
node!{$(#[$attr])* $name => $name}
};
($(#[$attr:meta])* $variant:ident => $name:ident) => {
#[derive(Debug, Clone, PartialEq)]
#[repr(transparent)]
$(#[$attr])*
pub struct $name(RedNode);
impl TypedNode for $name {
fn cast_from(node: RedRef) -> Option<Self> {
if node.kind() != &NodeKind::$variant {
return None;
}
Some(Self(node.own()))
}
}
impl $name {
pub fn span(&self) -> Span {
self.0.span()
}
pub fn underlying(&self) -> RedRef {
self.0.as_ref()
}
}
};
}