Bytes type

- Moves `Bytes` from `util` to `eval` module
- Accepts bytes in `str` function for bytes -> str conversion
- Adds `bytes` function for str | array -> bytes conversion
- Adds `array` function for bytes -> array conversion
- Adds `len`, `at`, and `slice` methods for bytes
- Adds `encoding` parameter to `read` function
This commit is contained in:
Laurenz 2023-08-04 15:09:01 +02:00
parent 028d2f5308
commit b8b0137504
33 changed files with 489 additions and 229 deletions

View File

@ -5,8 +5,8 @@ use std::path::{Path, PathBuf};
use memmap2::Mmap; use memmap2::Mmap;
use typst::diag::StrResult; use typst::diag::StrResult;
use typst::eval::Bytes;
use typst::font::{Font, FontBook, FontInfo, FontVariant}; use typst::font::{Font, FontBook, FontInfo, FontVariant};
use typst::util::Bytes;
use walkdir::WalkDir; use walkdir::WalkDir;
use crate::args::FontsCommand; use crate::args::FontsCommand;

View File

@ -9,10 +9,10 @@ use comemo::Prehashed;
use same_file::Handle; use same_file::Handle;
use siphasher::sip128::{Hasher128, SipHasher13}; use siphasher::sip128::{Hasher128, SipHasher13};
use typst::diag::{FileError, FileResult, StrResult}; use typst::diag::{FileError, FileResult, StrResult};
use typst::eval::{eco_format, Datetime, Library}; use typst::eval::{eco_format, Bytes, Datetime, Library};
use typst::font::{Font, FontBook}; use typst::font::{Font, FontBook};
use typst::syntax::{FileId, Source}; use typst::syntax::{FileId, Source};
use typst::util::{Bytes, PathExt}; use typst::util::PathExt;
use typst::World; use typst::World;
use crate::args::CompileCommand; use crate::args::CompileCommand;

View File

@ -4,11 +4,10 @@ use comemo::Prehashed;
use pulldown_cmark as md; use pulldown_cmark as md;
use typed_arena::Arena; use typed_arena::Arena;
use typst::diag::FileResult; use typst::diag::FileResult;
use typst::eval::{Datetime, Tracer}; use typst::eval::{Bytes, Datetime, Tracer};
use typst::font::{Font, FontBook}; use typst::font::{Font, FontBook};
use typst::geom::{Point, Size}; use typst::geom::{Point, Size};
use typst::syntax::{FileId, Source}; use typst::syntax::{FileId, Source};
use typst::util::Bytes;
use typst::World; use typst::World;
use yaml_front_matter::YamlFrontMatter; use yaml_front_matter::YamlFrontMatter;

View File

@ -983,6 +983,7 @@ const TYPE_ORDER: &[&str] = &[
"color", "color",
"datetime", "datetime",
"string", "string",
"bytes",
"regex", "regex",
"label", "label",
"content", "content",

View File

@ -3,7 +3,7 @@ use std::str::FromStr;
use time::{Month, PrimitiveDateTime}; use time::{Month, PrimitiveDateTime};
use typst::eval::{Datetime, Module, Regex}; use typst::eval::{Bytes, Datetime, Module, Reflect, Regex};
use crate::prelude::*; use crate::prelude::*;
@ -37,9 +37,9 @@ pub struct ToInt(i64);
cast! { cast! {
ToInt, ToInt,
v: bool => Self(v as i64), v: bool => Self(v as i64),
v: i64 => Self(v),
v: f64 => Self(v as i64), v: f64 => Self(v as i64),
v: EcoString => Self(v.parse().map_err(|_| eco_format!("invalid integer: {}", v))?), v: EcoString => Self(v.parse().map_err(|_| eco_format!("invalid integer: {}", v))?),
v: i64 => Self(v),
} }
/// Converts a value to a float. /// Converts a value to a float.
@ -77,9 +77,9 @@ cast! {
ToFloat, ToFloat,
v: bool => Self(v as i64 as f64), v: bool => Self(v as i64 as f64),
v: i64 => Self(v as f64), v: i64 => Self(v as f64),
v: f64 => Self(v),
v: Ratio => Self(v.get()), v: Ratio => Self(v.get()),
v: EcoString => Self(v.parse().map_err(|_| eco_format!("invalid float: {}", v))?), v: EcoString => Self(v.parse().map_err(|_| eco_format!("invalid float: {}", v))?),
v: f64 => Self(v),
} }
/// Creates a grayscale color. /// Creates a grayscale color.
@ -486,6 +486,7 @@ cast! {
/// optional `base` parameter. /// optional `base` parameter.
/// - Floats are formatted in base 10 and never in exponential notation. /// - Floats are formatted in base 10 and never in exponential notation.
/// - From labels the name is extracted. /// - From labels the name is extracted.
/// - Bytes are decoded as UTF-8.
/// ///
/// If you wish to convert from and to Unicode code points, see /// If you wish to convert from and to Unicode code points, see
/// [`str.to-unicode`]($func/str.to-unicode) and /// [`str.to-unicode`]($func/str.to-unicode) and
@ -545,6 +546,11 @@ cast! {
v: i64 => Self::Int(v), v: i64 => Self::Int(v),
v: f64 => Self::Str(format_str!("{}", v)), v: f64 => Self::Str(format_str!("{}", v)),
v: Label => Self::Str(v.0.into()), v: Label => Self::Str(v.0.into()),
v: Bytes => Self::Str(
std::str::from_utf8(&v)
.map_err(|_| "bytes are not valid utf-8")?
.into()
),
v: Str => Self::Str(v), v: Str => Self::Str(v),
} }
@ -633,35 +639,6 @@ cast! {
}, },
} }
/// Creates a label from a string.
///
/// Inserting a label into content attaches it to the closest previous element
/// that is not a space. Then, the element can be [referenced]($func/ref) and
/// styled through the label.
///
/// ## Example { #example }
/// ```example
/// #show <a>: set text(blue)
/// #show label("b"): set text(red)
///
/// = Heading <a>
/// *Strong* #label("b")
/// ```
///
/// ## Syntax { #syntax }
/// This function also has dedicated syntax: You can create a label by enclosing
/// its name in angle brackets. This works both in markup and code.
///
/// Display: Label
/// Category: construct
#[func]
pub fn label(
/// The name of the label.
name: EcoString,
) -> Label {
Label(name)
}
/// Creates a regular expression from a string. /// Creates a regular expression from a string.
/// ///
/// The result can be used as a /// The result can be used as a
@ -701,6 +678,106 @@ pub fn regex(
Regex::new(&regex.v).at(regex.span) Regex::new(&regex.v).at(regex.span)
} }
/// Converts a value to bytes.
///
/// - Strings are encoded in UTF-8.
/// - Arrays of integers between `{0}` and `{255}` are converted directly. The
/// dedicated byte representation is much more efficient than the array
/// representation and thus typically used for large byte buffers (e.g. image
/// data).
///
/// ```example
/// #bytes("Hello 😃") \
/// #bytes((123, 160, 22, 0))
/// ```
///
/// Display: Bytes
/// Category: construct
#[func]
pub fn bytes(
/// The value that should be converted to a string.
value: ToBytes,
) -> Bytes {
value.0
}
/// A value that can be cast to bytes.
pub struct ToBytes(Bytes);
cast! {
ToBytes,
v: Str => Self(v.as_bytes().into()),
v: Array => Self(v.iter()
.map(|v| match v {
Value::Int(byte @ 0..=255) => Ok(*byte as u8),
Value::Int(_) => bail!("number must be between 0 and 255"),
value => Err(<u8 as Reflect>::error(value)),
})
.collect::<Result<Vec<u8>, _>>()?
.into()
),
v: Bytes => Self(v),
}
/// Creates a label from a string.
///
/// Inserting a label into content attaches it to the closest previous element
/// that is not a space. Then, the element can be [referenced]($func/ref) and
/// styled through the label.
///
/// ## Example { #example }
/// ```example
/// #show <a>: set text(blue)
/// #show label("b"): set text(red)
///
/// = Heading <a>
/// *Strong* #label("b")
/// ```
///
/// ## Syntax { #syntax }
/// This function also has dedicated syntax: You can create a label by enclosing
/// its name in angle brackets. This works both in markup and code.
///
/// Display: Label
/// Category: construct
#[func]
pub fn label(
/// The name of the label.
name: EcoString,
) -> Label {
Label(name)
}
/// Converts a value to an array.
///
/// Note that this function is only intended for conversion of a collection-like
/// value to an array, not for creation of an array from individual items. Use
/// the array syntax `(1, 2, 3)` (or `(1,)` for a single-element array) instead.
///
/// ```example
/// #let hi = "Hello 😃"
/// #array(bytes(hi))
/// ```
///
/// Display: Array
/// Category: construct
#[func]
pub fn array(
/// The value that should be converted to an array.
value: ToArray,
) -> Array {
value.0
}
/// A value that can be cast to bytes.
pub struct ToArray(Array);
cast! {
ToArray,
v: Bytes => Self(v.iter().map(|&b| Value::Int(b as i64)).collect()),
v: Array => Self(v),
}
/// Creates an array consisting of consecutive integers. /// Creates an array consisting of consecutive integers.
/// ///
/// If you pass just one positional parameter, it is interpreted as the `end` of /// If you pass just one positional parameter, it is interpreted as the `end` of

View File

@ -1,18 +1,24 @@
use typst::diag::{format_xml_like_error, FileError}; use typst::diag::{format_xml_like_error, FileError};
use typst::eval::Datetime; use typst::eval::{Bytes, Datetime};
use crate::prelude::*; use crate::prelude::*;
/// Reads plain text from a file. /// Reads plain text or data from a file.
/// ///
/// The file will be read and returned as a string. /// By default, the file will be read as UTF-8 and returned as a
/// [string]($type/string).
///
/// If you specify `{encoding: none}`, this returns raw [bytes]($type/bytes)
/// instead.
/// ///
/// ## Example { #example } /// ## Example { #example }
/// ```example /// ```example
/// An example for a HTML file: \
/// #let text = read("data.html") /// #let text = read("data.html")
///
/// An example for a HTML file:\
/// #raw(text, lang: "html") /// #raw(text, lang: "html")
///
/// Raw bytes:
/// #read("tiger.jpg", encoding: none)
/// ``` /// ```
/// ///
/// Display: Read /// Display: Read
@ -21,16 +27,52 @@ use crate::prelude::*;
pub fn read( pub fn read(
/// Path to a file. /// Path to a file.
path: Spanned<EcoString>, path: Spanned<EcoString>,
/// The encoding to read the file with.
///
/// If set to `{none}`, this function returns raw bytes.
#[named]
#[default(Some(Encoding::Utf8))]
encoding: Option<Encoding>,
/// The virtual machine. /// The virtual machine.
vm: &mut Vm, vm: &mut Vm,
) -> SourceResult<Str> { ) -> SourceResult<Readable> {
let Spanned { v: path, span } = path; let Spanned { v: path, span } = path;
let id = vm.location().join(&path).at(span)?; let id = vm.location().join(&path).at(span)?;
let data = vm.world().file(id).at(span)?; let data = vm.world().file(id).at(span)?;
let text = std::str::from_utf8(&data) Ok(match encoding {
.map_err(|_| "file is not valid utf-8") None => Readable::Bytes(data),
.at(span)?; Some(Encoding::Utf8) => Readable::Str(
Ok(text.into()) std::str::from_utf8(&data)
.map_err(|_| "file is not valid utf-8")
.at(span)?
.into(),
),
})
}
/// An encoding of a file.
#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash, Cast)]
pub enum Encoding {
/// The Unicode UTF-8 encoding.
Utf8,
}
/// A value that can be read from a value.
pub enum Readable {
/// A decoded string.
Str(Str),
/// Raw bytes.
Bytes(Bytes),
}
cast! {
Readable,
self => match self {
Self::Str(v) => v.into_value(),
Self::Bytes(v) => v.into_value(),
},
v: Str => Self::Str(v),
v: Bytes => Self::Bytes(v),
} }
/// Reads structured data from a CSV file. /// Reads structured data from a CSV file.

View File

@ -27,8 +27,10 @@ pub(super) fn define(global: &mut Scope) {
global.define("datetime", datetime_func()); global.define("datetime", datetime_func());
global.define("symbol", symbol_func()); global.define("symbol", symbol_func());
global.define("str", str_func()); global.define("str", str_func());
global.define("bytes", bytes_func());
global.define("label", label_func()); global.define("label", label_func());
global.define("regex", regex_func()); global.define("regex", regex_func());
global.define("array", array_func());
global.define("range", range_func()); global.define("range", range_func());
global.define("read", read_func()); global.define("read", read_func());
global.define("csv", csv_func()); global.define("csv", csv_func());

View File

@ -8,7 +8,8 @@ use hayagriva::io::{BibLaTeXError, YamlBibliographyError};
use hayagriva::style::{self, Brackets, Citation, Database, DisplayString, Formatting}; use hayagriva::style::{self, Brackets, Citation, Database, DisplayString, Formatting};
use hayagriva::Entry; use hayagriva::Entry;
use typst::diag::FileError; use typst::diag::FileError;
use typst::util::{option_eq, Bytes}; use typst::eval::Bytes;
use typst::util::option_eq;
use super::{LinkElem, LocalName, RefElem}; use super::{LinkElem, LocalName, RefElem};
use crate::layout::{BlockElem, GridElem, ParElem, Sizing, TrackSizings, VElem}; use crate::layout::{BlockElem, GridElem, ParElem, Sizing, TrackSizings, VElem};

View File

@ -6,8 +6,8 @@ use once_cell::unsync::Lazy as UnsyncLazy;
use syntect::highlighting as synt; use syntect::highlighting as synt;
use syntect::parsing::{SyntaxDefinition, SyntaxSet, SyntaxSetBuilder}; use syntect::parsing::{SyntaxDefinition, SyntaxSet, SyntaxSetBuilder};
use typst::diag::FileError; use typst::diag::FileError;
use typst::eval::Bytes;
use typst::syntax::{self, LinkedNode}; use typst::syntax::{self, LinkedNode};
use typst::util::Bytes;
use super::{ use super::{
FontFamily, FontList, Hyphenate, LinebreakElem, SmartQuoteElem, TextElem, TextSize, FontFamily, FontList, Hyphenate, LinebreakElem, SmartQuoteElem, TextElem, TextSize,

View File

@ -1,8 +1,8 @@
use std::ffi::OsStr; use std::ffi::OsStr;
use std::path::Path; use std::path::Path;
use typst::eval::Bytes;
use typst::image::{Image, ImageFormat, RasterFormat, VectorFormat}; use typst::image::{Image, ImageFormat, RasterFormat, VectorFormat};
use typst::util::Bytes;
use crate::meta::{Figurable, LocalName}; use crate::meta::{Figurable, LocalName};
use crate::prelude::*; use crate::prelude::*;

View File

@ -74,13 +74,9 @@ impl Array {
} }
/// Borrow the value at the given index. /// Borrow the value at the given index.
pub fn at<'a>( pub fn at(&self, index: i64, default: Option<Value>) -> StrResult<Value> {
&'a self, self.locate_opt(index, false)
index: i64, .and_then(|i| self.0.get(i).cloned())
default: Option<&'a Value>,
) -> StrResult<&'a Value> {
self.locate(index)
.and_then(|i| self.0.get(i))
.or(default) .or(default)
.ok_or_else(|| out_of_bounds_no_default(index, self.len())) .ok_or_else(|| out_of_bounds_no_default(index, self.len()))
} }
@ -88,7 +84,7 @@ impl Array {
/// Mutably borrow the value at the given index. /// Mutably borrow the value at the given index.
pub fn at_mut(&mut self, index: i64) -> StrResult<&mut Value> { pub fn at_mut(&mut self, index: i64) -> StrResult<&mut Value> {
let len = self.len(); let len = self.len();
self.locate(index) self.locate_opt(index, false)
.and_then(move |i| self.0.make_mut().get_mut(i)) .and_then(move |i| self.0.make_mut().get_mut(i))
.ok_or_else(|| out_of_bounds_no_default(index, len)) .ok_or_else(|| out_of_bounds_no_default(index, len))
} }
@ -105,42 +101,21 @@ impl Array {
/// Insert a value at the specified index. /// Insert a value at the specified index.
pub fn insert(&mut self, index: i64, value: Value) -> StrResult<()> { pub fn insert(&mut self, index: i64, value: Value) -> StrResult<()> {
let len = self.len(); let i = self.locate(index, true)?;
let i = self
.locate(index)
.filter(|&i| i <= self.0.len())
.ok_or_else(|| out_of_bounds(index, len))?;
self.0.insert(i, value); self.0.insert(i, value);
Ok(()) Ok(())
} }
/// Remove and return the value at the specified index. /// Remove and return the value at the specified index.
pub fn remove(&mut self, index: i64) -> StrResult<Value> { pub fn remove(&mut self, index: i64) -> StrResult<Value> {
let len = self.len(); let i = self.locate(index, false)?;
let i = self
.locate(index)
.filter(|&i| i < self.0.len())
.ok_or_else(|| out_of_bounds(index, len))?;
Ok(self.0.remove(i)) Ok(self.0.remove(i))
} }
/// Extract a contiguous subregion of the array. /// Extract a contiguous subregion of the array.
pub fn slice(&self, start: i64, end: Option<i64>) -> StrResult<Self> { pub fn slice(&self, start: i64, end: Option<i64>) -> StrResult<Self> {
let len = self.len(); let start = self.locate(start, true)?;
let start = self let end = self.locate(end.unwrap_or(self.len() as i64), true)?.max(start);
.locate(start)
.filter(|&start| start <= self.0.len())
.ok_or_else(|| out_of_bounds(start, len))?;
let end = end.unwrap_or(self.len() as i64);
let end = self
.locate(end)
.filter(|&end| end <= self.0.len())
.ok_or_else(|| out_of_bounds(end, len))?
.max(start);
Ok(self.0[start..end].into()) Ok(self.0[start..end].into())
} }
@ -371,26 +346,6 @@ impl Array {
Ok(self.iter().cloned().cycle().take(count).collect()) Ok(self.iter().cloned().cycle().take(count).collect())
} }
/// Extract a slice of the whole array.
pub fn as_slice(&self) -> &[Value] {
self.0.as_slice()
}
/// Iterate over references to the contained values.
pub fn iter(&self) -> std::slice::Iter<Value> {
self.0.iter()
}
/// Resolve an index.
fn locate(&self, index: i64) -> Option<usize> {
usize::try_from(if index >= 0 {
index
} else {
(self.len() as i64).checked_add(index)?
})
.ok()
}
/// Enumerate all items in the array. /// Enumerate all items in the array.
pub fn enumerate(&self, start: i64) -> StrResult<Self> { pub fn enumerate(&self, start: i64) -> StrResult<Self> {
self.iter() self.iter()
@ -438,11 +393,44 @@ impl Array {
Ok(Self(out)) Ok(Self(out))
} }
/// Extract a slice of the whole array.
pub fn as_slice(&self) -> &[Value] {
self.0.as_slice()
}
/// Iterate over references to the contained values.
pub fn iter(&self) -> std::slice::Iter<Value> {
self.0.iter()
}
/// Resolve an index or throw an out of bounds error.
fn locate(&self, index: i64, end_ok: bool) -> StrResult<usize> {
self.locate_opt(index, end_ok)
.ok_or_else(|| out_of_bounds(index, self.len()))
}
/// Resolve an index, if it is within bounds.
///
/// `index == len` is considered in bounds if and only if `end_ok` is true.
fn locate_opt(&self, index: i64, end_ok: bool) -> Option<usize> {
let wrapped =
if index >= 0 { Some(index) } else { (self.len() as i64).checked_add(index) };
wrapped
.and_then(|v| usize::try_from(v).ok())
.filter(|&v| v < self.0.len() + end_ok as usize)
}
} }
impl Debug for Array { impl Debug for Array {
fn fmt(&self, f: &mut Formatter) -> fmt::Result { fn fmt(&self, f: &mut Formatter) -> fmt::Result {
let pieces: Vec<_> = self.iter().map(|value| eco_format!("{value:?}")).collect(); let max = 40;
let mut pieces: Vec<_> =
self.iter().take(max).map(|value| eco_format!("{value:?}")).collect();
if self.len() > max {
pieces.push(eco_format!(".. ({} items omitted)", self.len() - max));
}
f.write_str(&pretty_array_like(&pieces, self.len() == 1)) f.write_str(&pretty_array_like(&pieces, self.len() == 1))
} }
} }

View File

@ -0,0 +1,111 @@
use std::borrow::Cow;
use std::fmt::{self, Debug, Formatter};
use std::ops::Deref;
use std::sync::Arc;
use comemo::Prehashed;
use ecow::{eco_format, EcoString};
use crate::diag::StrResult;
use super::Value;
/// A shared byte buffer that is cheap to clone and hash.
#[derive(Clone, Hash, Eq, PartialEq)]
pub struct Bytes(Arc<Prehashed<Cow<'static, [u8]>>>);
impl Bytes {
/// Create a buffer from a static byte slice.
pub fn from_static(slice: &'static [u8]) -> Self {
Self(Arc::new(Prehashed::new(Cow::Borrowed(slice))))
}
/// Get the byte at the given index.
pub fn at(&self, index: i64, default: Option<Value>) -> StrResult<Value> {
self.locate_opt(index)
.and_then(|i| self.0.get(i).map(|&b| Value::Int(b as i64)))
.or(default)
.ok_or_else(|| out_of_bounds_no_default(index, self.len()))
}
/// Extract a contiguous subregion of the bytes.
pub fn slice(&self, start: i64, end: Option<i64>) -> StrResult<Self> {
let start = self.locate(start)?;
let end = self.locate(end.unwrap_or(self.len() as i64))?.max(start);
Ok(self.0[start..end].into())
}
/// Return a view into the buffer.
pub fn as_slice(&self) -> &[u8] {
self
}
/// Return a copy of the buffer as a vector.
pub fn to_vec(&self) -> Vec<u8> {
self.0.to_vec()
}
/// Resolve an index or throw an out of bounds error.
fn locate(&self, index: i64) -> StrResult<usize> {
self.locate_opt(index).ok_or_else(|| out_of_bounds(index, self.len()))
}
/// Resolve an index, if it is within bounds.
///
/// `index == len` is considered in bounds.
fn locate_opt(&self, index: i64) -> Option<usize> {
let wrapped =
if index >= 0 { Some(index) } else { (self.len() as i64).checked_add(index) };
wrapped
.and_then(|v| usize::try_from(v).ok())
.filter(|&v| v <= self.0.len())
}
}
impl From<&[u8]> for Bytes {
fn from(slice: &[u8]) -> Self {
Self(Arc::new(Prehashed::new(slice.to_vec().into())))
}
}
impl From<Vec<u8>> for Bytes {
fn from(vec: Vec<u8>) -> Self {
Self(Arc::new(Prehashed::new(vec.into())))
}
}
impl Deref for Bytes {
type Target = [u8];
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl AsRef<[u8]> for Bytes {
fn as_ref(&self) -> &[u8] {
self
}
}
impl Debug for Bytes {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
write!(f, "bytes({})", self.len())
}
}
/// The out of bounds access error message.
#[cold]
fn out_of_bounds(index: i64, len: usize) -> EcoString {
eco_format!("byte index out of bounds (index: {index}, len: {len})")
}
/// The out of bounds access error message when no default value was given.
#[cold]
fn out_of_bounds_no_default(index: i64, len: usize) -> EcoString {
eco_format!(
"byte index out of bounds (index: {index}, len: {len}) \
and no default value was specified",
)
}

View File

@ -49,12 +49,12 @@ impl Dict {
} }
/// Borrow the value the given `key` maps to, /// Borrow the value the given `key` maps to,
pub fn at<'a>( pub fn at(&self, key: &str, default: Option<Value>) -> StrResult<Value> {
&'a self, self.0
key: &str, .get(key)
default: Option<&'a Value>, .cloned()
) -> StrResult<&'a Value> { .or(default)
self.0.get(key).or(default).ok_or_else(|| missing_key_no_default(key)) .ok_or_else(|| missing_key_no_default(key))
} }
/// Mutably borrow the value the given `key` maps to. /// Mutably borrow the value the given `key` maps to.
@ -140,8 +140,10 @@ impl Debug for Dict {
return f.write_str("(:)"); return f.write_str("(:)");
} }
let pieces: Vec<_> = self let max = 40;
let mut pieces: Vec<_> = self
.iter() .iter()
.take(max)
.map(|(key, value)| { .map(|(key, value)| {
if is_ident(key) { if is_ident(key) {
eco_format!("{key}: {value:?}") eco_format!("{key}: {value:?}")
@ -151,6 +153,10 @@ impl Debug for Dict {
}) })
.collect(); .collect();
if self.len() > max {
pieces.push(eco_format!(".. ({} pairs omitted)", self.len() - max));
}
f.write_str(&pretty_array_like(&pieces, false)) f.write_str(&pretty_array_like(&pieces, false))
} }
} }

View File

@ -55,11 +55,10 @@ pub fn call(
"len" => string.len().into_value(), "len" => string.len().into_value(),
"first" => string.first().at(span)?.into_value(), "first" => string.first().at(span)?.into_value(),
"last" => string.last().at(span)?.into_value(), "last" => string.last().at(span)?.into_value(),
"at" => { "at" => string
let index = args.expect("index")?; .at(args.expect("index")?, args.named("default")?)
let default = args.named::<EcoString>("default")?; .at(span)?
string.at(index, default.as_deref()).at(span)?.into_value() .into_value(),
}
"slice" => { "slice" => {
let start = args.expect("start")?; let start = args.expect("start")?;
let mut end = args.eat()?; let mut end = args.eat()?;
@ -93,11 +92,25 @@ pub fn call(
_ => return missing(), _ => return missing(),
}, },
Value::Bytes(bytes) => match method {
"len" => bytes.len().into_value(),
"at" => bytes.at(args.expect("index")?, args.named("default")?).at(span)?,
"slice" => {
let start = args.expect("start")?;
let mut end = args.eat()?;
if end.is_none() {
end = args.named("count")?.map(|c: i64| start + c);
}
bytes.slice(start, end).at(span)?.into_value()
}
_ => return missing(),
},
Value::Content(content) => match method { Value::Content(content) => match method {
"func" => content.func().into_value(), "func" => content.func().into_value(),
"has" => content.has(&args.expect::<EcoString>("field")?).into_value(), "has" => content.has(&args.expect::<EcoString>("field")?).into_value(),
"at" => content "at" => content
.at(&args.expect::<EcoString>("field")?, args.named("default")?) .at(&args.expect::<Str>("field")?, args.named("default")?)
.at(span)?, .at(span)?,
"fields" => content.dict().into_value(), "fields" => content.dict().into_value(),
"location" => content "location" => content
@ -112,10 +125,7 @@ pub fn call(
"len" => array.len().into_value(), "len" => array.len().into_value(),
"first" => array.first().at(span)?.clone(), "first" => array.first().at(span)?.clone(),
"last" => array.last().at(span)?.clone(), "last" => array.last().at(span)?.clone(),
"at" => array "at" => array.at(args.expect("index")?, args.named("default")?).at(span)?,
.at(args.expect("index")?, args.named("default")?.as_ref())
.at(span)?
.clone(),
"slice" => { "slice" => {
let start = args.expect("start")?; let start = args.expect("start")?;
let mut end = args.eat()?; let mut end = args.eat()?;
@ -157,9 +167,8 @@ pub fn call(
Value::Dict(dict) => match method { Value::Dict(dict) => match method {
"len" => dict.len().into_value(), "len" => dict.len().into_value(),
"at" => dict "at" => dict
.at(&args.expect::<Str>("key")?, args.named("default")?.as_ref()) .at(&args.expect::<Str>("key")?, args.named("default")?)
.at(span)? .at(span)?,
.clone(),
"keys" => dict.keys().into_value(), "keys" => dict.keys().into_value(),
"values" => dict.values().into_value(), "values" => dict.values().into_value(),
"pairs" => dict.pairs().into_value(), "pairs" => dict.pairs().into_value(),
@ -396,6 +405,7 @@ pub fn methods_on(type_name: &str) -> &[(&'static str, bool)] {
("starts-with", true), ("starts-with", true),
("trim", true), ("trim", true),
], ],
"bytes" => &[("len", false), ("at", true), ("slice", true)],
"content" => &[ "content" => &[
("func", false), ("func", false),
("has", true), ("has", true),

View File

@ -14,6 +14,7 @@ mod str;
mod value; mod value;
mod args; mod args;
mod auto; mod auto;
mod bytes;
mod datetime; mod datetime;
mod fields; mod fields;
mod func; mod func;
@ -40,6 +41,7 @@ pub use typst_macros::{func, symbols};
pub use self::args::{Arg, Args}; pub use self::args::{Arg, Args};
pub use self::array::{array, Array}; pub use self::array::{array, Array};
pub use self::auto::AutoValue; pub use self::auto::AutoValue;
pub use self::bytes::Bytes;
pub use self::cast::{ pub use self::cast::{
cast, Cast, CastInfo, FromValue, IntoResult, IntoValue, Never, Reflect, Variadics, cast, Cast, CastInfo, FromValue, IntoResult, IntoValue, Never, Reflect, Variadics,
}; };
@ -1371,7 +1373,7 @@ where
let Ok(v) = value.at(i as i64, None) else { let Ok(v) = value.at(i as i64, None) else {
bail!(expr.span(), "not enough elements to destructure"); bail!(expr.span(), "not enough elements to destructure");
}; };
f(vm, expr, v.clone())?; f(vm, expr, v)?;
i += 1; i += 1;
} }
ast::DestructuringKind::Sink(spread) => { ast::DestructuringKind::Sink(spread) => {
@ -1423,7 +1425,7 @@ where
.at(&ident, None) .at(&ident, None)
.map_err(|_| "destructuring key not found in dictionary") .map_err(|_| "destructuring key not found in dictionary")
.at(ident.span())?; .at(ident.span())?;
f(vm, ast::Expr::Ident(ident.clone()), v.clone())?; f(vm, ast::Expr::Ident(ident.clone()), v)?;
used.insert(ident.take()); used.insert(ident.take());
} }
ast::DestructuringKind::Sink(spread) => sink = spread.expr(), ast::DestructuringKind::Sink(spread) => sink = spread.expr(),
@ -1433,7 +1435,7 @@ where
.at(&name, None) .at(&name, None)
.map_err(|_| "destructuring key not found in dictionary") .map_err(|_| "destructuring key not found in dictionary")
.at(name.span())?; .at(name.span())?;
f(vm, named.expr(), v.clone())?; f(vm, named.expr(), v)?;
used.insert(name.take()); used.insert(name.take());
} }
ast::DestructuringKind::Placeholder(_) => {} ast::DestructuringKind::Placeholder(_) => {}

View File

@ -347,6 +347,7 @@ pub fn equal(lhs: &Value, rhs: &Value) -> bool {
(Color(a), Color(b)) => a == b, (Color(a), Color(b)) => a == b,
(Symbol(a), Symbol(b)) => a == b, (Symbol(a), Symbol(b)) => a == b,
(Str(a), Str(b)) => a == b, (Str(a), Str(b)) => a == b,
(Bytes(a), Bytes(b)) => a == b,
(Label(a), Label(b)) => a == b, (Label(a), Label(b)) => a == b,
(Content(a), Content(b)) => a == b, (Content(a), Content(b)) => a == b,
(Array(a), Array(b)) => a == b, (Array(a), Array(b)) => a == b,

View File

@ -68,14 +68,12 @@ impl Str {
} }
/// Extract the grapheme cluster at the given index. /// Extract the grapheme cluster at the given index.
pub fn at<'a>(&'a self, index: i64, default: Option<&'a str>) -> StrResult<Self> { pub fn at(&self, index: i64, default: Option<Value>) -> StrResult<Value> {
let len = self.len(); let len = self.len();
let grapheme = self self.locate_opt(index)?
.locate_opt(index)? .and_then(|i| self.0[i..].graphemes(true).next().map(|s| s.into_value()))
.and_then(|i| self.0[i..].graphemes(true).next())
.or(default) .or(default)
.ok_or_else(|| no_default_and_out_of_bounds(index, len))?; .ok_or_else(|| no_default_and_out_of_bounds(index, len))
Ok(grapheme.into())
} }
/// Extract a contiguous substring. /// Extract a contiguous substring.
@ -324,8 +322,15 @@ impl Str {
Ok(Self(self.0.repeat(n))) Ok(Self(self.0.repeat(n)))
} }
/// Resolve an index, if it is within bounds. /// Resolve an index or throw an out of bounds error.
/// Errors on invalid char boundaries. fn locate(&self, index: i64) -> StrResult<usize> {
self.locate_opt(index)?
.ok_or_else(|| out_of_bounds(index, self.len()))
}
/// Resolve an index, if it is within bounds and on a valid char boundary.
///
/// `index == len` is considered in bounds.
fn locate_opt(&self, index: i64) -> StrResult<Option<usize>> { fn locate_opt(&self, index: i64) -> StrResult<Option<usize>> {
let wrapped = let wrapped =
if index >= 0 { Some(index) } else { (self.len() as i64).checked_add(index) }; if index >= 0 { Some(index) } else { (self.len() as i64).checked_add(index) };
@ -340,12 +345,6 @@ impl Str {
Ok(resolved) Ok(resolved)
} }
/// Resolve an index or throw an out of bounds error.
fn locate(&self, index: i64) -> StrResult<usize> {
self.locate_opt(index)?
.ok_or_else(|| out_of_bounds(index, self.len()))
}
} }
/// The out of bounds access error message. /// The out of bounds access error message.

View File

@ -8,14 +8,13 @@ use ecow::eco_format;
use siphasher::sip128::{Hasher128, SipHasher13}; use siphasher::sip128::{Hasher128, SipHasher13};
use super::{ use super::{
cast, fields, format_str, ops, Args, Array, CastInfo, Content, Dict, FromValue, Func, cast, fields, format_str, ops, Args, Array, Bytes, CastInfo, Content, Dict,
IntoValue, Module, Reflect, Str, Symbol, FromValue, Func, IntoValue, Module, Reflect, Str, Symbol,
}; };
use crate::diag::StrResult; use crate::diag::StrResult;
use crate::geom::{Abs, Angle, Color, Em, Fr, Length, Ratio, Rel}; use crate::geom::{Abs, Angle, Color, Em, Fr, Length, Ratio, Rel};
use crate::model::{Label, Styles}; use crate::model::{Label, Styles};
use crate::syntax::{ast, Span}; use crate::syntax::{ast, Span};
use crate::util::Bytes;
/// A computational value. /// A computational value.
#[derive(Default, Clone)] #[derive(Default, Clone)]
@ -132,7 +131,7 @@ impl Value {
pub fn field(&self, field: &str) -> StrResult<Value> { pub fn field(&self, field: &str) -> StrResult<Value> {
match self { match self {
Self::Symbol(symbol) => symbol.clone().modified(field).map(Self::Symbol), Self::Symbol(symbol) => symbol.clone().modified(field).map(Self::Symbol),
Self::Dict(dict) => dict.at(field, None).cloned(), Self::Dict(dict) => dict.at(field, None),
Self::Content(content) => content.at(field, None), Self::Content(content) => content.at(field, None),
Self::Module(module) => module.get(field).cloned(), Self::Module(module) => module.get(field).cloned(),
Self::Func(func) => func.get(field).cloned(), Self::Func(func) => func.get(field).cloned(),

View File

@ -7,8 +7,9 @@ use ttf_parser::{name_id, GlyphId, Tag};
use unicode_general_category::GeneralCategory; use unicode_general_category::GeneralCategory;
use super::{deflate, EmExt, PdfContext, RefExt}; use super::{deflate, EmExt, PdfContext, RefExt};
use crate::eval::Bytes;
use crate::font::Font; use crate::font::Font;
use crate::util::{Bytes, SliceExt}; use crate::util::SliceExt;
const CMAP_NAME: Name = Name(b"Custom"); const CMAP_NAME: Name = Name(b"Custom");
const SYSTEM_INFO: SystemInfo = SystemInfo { const SYSTEM_INFO: SystemInfo = SystemInfo {

View File

@ -4,8 +4,8 @@ use image::{DynamicImage, GenericImageView, Rgba};
use pdf_writer::{Filter, Finish}; use pdf_writer::{Filter, Finish};
use super::{deflate, PdfContext, RefExt}; use super::{deflate, PdfContext, RefExt};
use crate::eval::Bytes;
use crate::image::{DecodedImage, Image, RasterFormat}; use crate::image::{DecodedImage, Image, RasterFormat};
use crate::util::Bytes;
/// Embed all used images into the PDF. /// Embed all used images into the PDF.
#[tracing::instrument(skip_all)] #[tracing::instrument(skip_all)]

View File

@ -13,9 +13,8 @@ use std::sync::Arc;
use ttf_parser::GlyphId; use ttf_parser::GlyphId;
use self::book::find_name; use self::book::find_name;
use crate::eval::Cast; use crate::eval::{Bytes, Cast};
use crate::geom::Em; use crate::geom::Em;
use crate::util::Bytes;
/// An OpenType font. /// An OpenType font.
/// ///

View File

@ -896,13 +896,13 @@ fn code_completions(ctx: &mut CompletionContext, hashtag: bool) {
); );
ctx.snippet_completion( ctx.snippet_completion(
"array", "array literal",
"(${1, 2, 3})", "(${1, 2, 3})",
"Creates a sequence of values.", "Creates a sequence of values.",
); );
ctx.snippet_completion( ctx.snippet_completion(
"dictionary", "dictionary literal",
"(${a: 1, b: 2})", "(${a: 1, b: 2})",
"Creates a mapping from names to value.", "Creates a mapping from names to value.",
); );

View File

@ -16,9 +16,9 @@ use image::{ImageDecoder, ImageResult};
use usvg::{TreeParsing, TreeTextToPath}; use usvg::{TreeParsing, TreeTextToPath};
use crate::diag::{format_xml_like_error, StrResult}; use crate::diag::{format_xml_like_error, StrResult};
use crate::eval::Bytes;
use crate::font::Font; use crate::font::Font;
use crate::geom::Axes; use crate::geom::Axes;
use crate::util::Bytes;
use crate::World; use crate::World;
/// A raster or vector image. /// A raster or vector image.

View File

@ -60,10 +60,9 @@ use ecow::EcoString;
use crate::diag::{FileResult, SourceResult}; use crate::diag::{FileResult, SourceResult};
use crate::doc::Document; use crate::doc::Document;
use crate::eval::{Datetime, Library, Route, Tracer}; use crate::eval::{Bytes, Datetime, Library, Route, Tracer};
use crate::font::{Font, FontBook}; use crate::font::{Font, FontBook};
use crate::syntax::{FileId, PackageSpec, Source, Span}; use crate::syntax::{FileId, PackageSpec, Source, Span};
use crate::util::Bytes;
/// Compile a source file into a fully layouted document. /// Compile a source file into a fully layouted document.
#[tracing::instrument(skip_all)] #[tracing::instrument(skip_all)]

View File

@ -1,59 +0,0 @@
use std::borrow::Cow;
use std::fmt::{self, Debug, Formatter};
use std::ops::Deref;
use std::sync::Arc;
use comemo::Prehashed;
/// A shared byte buffer that is cheap to clone and hash.
#[derive(Clone, Hash, Eq, PartialEq)]
pub struct Bytes(Arc<Prehashed<Cow<'static, [u8]>>>);
impl Bytes {
/// Create a buffer from a static byte slice.
pub fn from_static(slice: &'static [u8]) -> Self {
Self(Arc::new(Prehashed::new(Cow::Borrowed(slice))))
}
/// Return a view into the buffer.
pub fn as_slice(&self) -> &[u8] {
self
}
/// Return a copy of the buffer as a vector.
pub fn to_vec(&self) -> Vec<u8> {
self.0.to_vec()
}
}
impl From<&[u8]> for Bytes {
fn from(slice: &[u8]) -> Self {
Self(Arc::new(Prehashed::new(slice.to_vec().into())))
}
}
impl From<Vec<u8>> for Bytes {
fn from(vec: Vec<u8>) -> Self {
Self(Arc::new(Prehashed::new(vec.into())))
}
}
impl Deref for Bytes {
type Target = [u8];
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl AsRef<[u8]> for Bytes {
fn as_ref(&self) -> &[u8] {
self
}
}
impl Debug for Bytes {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
write!(f, "bytes({})", self.len())
}
}

View File

@ -2,10 +2,6 @@
pub mod fat; pub mod fat;
mod bytes;
pub use bytes::Bytes;
use std::fmt::{self, Debug, Formatter}; use std::fmt::{self, Debug, Formatter};
use std::hash::Hash; use std::hash::Hash;
use std::num::NonZeroUsize; use std::num::NonZeroUsize;

View File

@ -46,6 +46,8 @@ integers, integers cannot be smaller than `{-9223372036854775808}` or larger tha
The number can also be specified as hexadecimal, octal, or binary by starting it The number can also be specified as hexadecimal, octal, or binary by starting it
with a zero followed by either `x`, `o`, or `b`. with a zero followed by either `x`, `o`, or `b`.
You can convert a value to an integer with the [`float`]($func/float) function.
## Example ## Example
```example ```example
#(1 + 2) \ #(1 + 2) \
@ -64,6 +66,8 @@ A limited-precision representation of a real number. Typst uses 64 bits to
store floats. Wherever a float is expected, you can also pass an store floats. Wherever a float is expected, you can also pass an
[integer]($type/integer). [integer]($type/integer).
You can convert a value to a float with the [`float`]($func/float) function.
## Example ## Example
```example ```example
#3.14 \ #3.14 \
@ -87,6 +91,8 @@ A length has the following fields:
- `abs`: A length with just the absolute component of the current length - `abs`: A length with just the absolute component of the current length
(that is, excluding the `em` component). (that is, excluding the `em` component).
You can multiply lengths with and divide them by integers and floats.
## Example ## Example
```example ```example
#rect(width: 20pt) #rect(width: 20pt)
@ -458,6 +464,65 @@ $arrow.r$ \
$arrow.t.quad$ $arrow.t.quad$
``` ```
# Bytes
A sequence of bytes.
This is conceptually similar to an array of [integers]($type/integer) between
`{0}` and `{255}`, but represented much more efficiently.
You can convert
- a [string]($type/string) or an [array]($type/array) of integers to bytes with
the [`bytes`]($func/bytes) function
- bytes to a string with the [`str`]($func/str) function
- bytes to an array of integers with the [`array`]($func/array) function
When [reading]($func/read) data from a file, you can decide whether to load it
as a string or as raw bytes.
```example
#bytes((123, 160, 22, 0)) \
#bytes("Hello 😃")
#let data = read(
"rhino.png",
encoding: none,
)
// Magic bytes.
#array(data.slice(0, 4)) \
#str(data.slice(1, 4))
```
## Methods
### len()
The length in bytes.
- returns: integer
### at()
Returns the byte at the specified index. Returns the default value if the index
is out of bounds or fails with an error if no default value was specified.
- index: integer (positional, required)
The index at which to retrieve the byte.
- default: any (named)
A default value to return if the index is out of bounds.
- returns: integer or any
### slice()
Extract a subslice of the bytes.
Fails with an error if the start or index is out of bounds.
- start: integer (positional, required)
The start index (inclusive).
- end: integer (positional)
The end index (exclusive). If omitted, the whole slice until the end is
extracted.
- count: integer (named)
The number of bytes to extract. This is equivalent to passing
`start + count` as the `end` position. Mutually exclusive with `end`.
- returns: bytes
# String # String
A sequence of Unicode codepoints. A sequence of Unicode codepoints.
@ -475,6 +540,8 @@ quite versatile.
All lengths and indices are expressed in terms of UTF-8 characters. Indices are All lengths and indices are expressed in terms of UTF-8 characters. Indices are
zero-based and negative indices wrap around to the end of the string. zero-based and negative indices wrap around to the end of the string.
You can convert a value to a string with the [`str`]($func/str) function.
### Example ### Example
```example ```example
#"hello world!" \ #"hello world!" \
@ -521,7 +588,7 @@ value was specified.
The byte index. The byte index.
- default: any (named) - default: any (named)
A default value to return if the index is out of bounds. A default value to return if the index is out of bounds.
- returns: string - returns: string or any
### slice() ### slice()
Extract a substring of the string. Extract a substring of the string.
@ -839,8 +906,8 @@ Fails with an error if the start or index is out of bounds.
The end index (exclusive). If omitted, the whole slice until the end of the The end index (exclusive). If omitted, the whole slice until the end of the
array is extracted. array is extracted.
- count: integer (named) - count: integer (named)
The number of items to extract. This is equivalent to passing `start + The number of items to extract. This is equivalent to passing
count` as the `end` position. Mutually exclusive with `end`. `start + count` as the `end` position. Mutually exclusive with `end`.
- returns: array - returns: array
### contains() ### contains()

View File

@ -1,11 +1,10 @@
use comemo::{Prehashed, Track, Tracked}; use comemo::{Prehashed, Track, Tracked};
use iai::{black_box, main, Iai}; use iai::{black_box, main, Iai};
use typst::diag::FileResult; use typst::diag::FileResult;
use typst::eval::{Datetime, Library, Tracer}; use typst::eval::{Bytes, Datetime, Library, Tracer};
use typst::font::{Font, FontBook}; use typst::font::{Font, FontBook};
use typst::geom::Color; use typst::geom::Color;
use typst::syntax::{FileId, Source}; use typst::syntax::{FileId, Source};
use typst::util::Bytes;
use typst::World; use typst::World;
use unscanny::Scanner; use unscanny::Scanner;

View File

@ -22,11 +22,11 @@ use walkdir::WalkDir;
use typst::diag::{bail, FileError, FileResult, Severity, StrResult}; use typst::diag::{bail, FileError, FileResult, Severity, StrResult};
use typst::doc::{Document, Frame, FrameItem, Meta}; use typst::doc::{Document, Frame, FrameItem, Meta};
use typst::eval::{eco_format, func, Datetime, Library, NoneValue, Tracer, Value}; use typst::eval::{eco_format, func, Bytes, Datetime, Library, NoneValue, Tracer, Value};
use typst::font::{Font, FontBook}; use typst::font::{Font, FontBook};
use typst::geom::{Abs, Color, RgbaColor, Smart}; use typst::geom::{Abs, Color, RgbaColor, Smart};
use typst::syntax::{FileId, Source, Span, SyntaxNode}; use typst::syntax::{FileId, Source, Span, SyntaxNode};
use typst::util::{Bytes, PathExt}; use typst::util::PathExt;
use typst::World; use typst::World;
use typst_library::layout::{Margin, PageElem}; use typst_library::layout::{Margin, PageElem};
use typst_library::text::{TextElem, TextSize}; use typst_library::text::{TextElem, TextSize};

View File

@ -0,0 +1,21 @@
// Test the bytes type.
// Ref: false
---
#let data = read("/files/rhino.png", encoding: none)
#test(data.len(), 232243)
#test(data.slice(0, count: 5), bytes((137, 80, 78, 71, 13)))
#test(str(data.slice(1, 4)), "PNG")
#test(repr(data), "bytes(232243)")
---
#test(str(bytes(range(0x41, 0x50))), "ABCDEFGHIJKLMNO")
#test(array(bytes("Hello")), (0x48, 0x65, 0x6C, 0x6C, 0x6F))
---
// Error: 8-14 expected string, array, or bytes, found dictionary
#bytes((a: 1))
---
// Error: 8-15 expected bytes or array, found string
#array("hello")

View File

@ -41,8 +41,7 @@
#"Hello".at(5) #"Hello".at(5)
--- ---
// Error: 25-32 expected string, found dictionary #test("Hello".at(5, default: (a: 10)), (a: 10))
#"Hello".at(5, default: (a: 10))
--- ---
// Test the `slice` method. // Test the `slice` method.

View File

@ -18,11 +18,11 @@
#test(calc.round(calc.pi, digits: 2), 3.14) #test(calc.round(calc.pi, digits: 2), 3.14)
--- ---
// Error: 6-10 expected boolean, integer, float, or string, found length // Error: 6-10 expected boolean, float, string, or integer, found length
#int(10pt) #int(10pt)
--- ---
// Error: 8-13 expected boolean, integer, float, ratio, or string, found function // Error: 8-13 expected boolean, integer, ratio, string, or float, found function
#float(float) #float(float)
--- ---

View File

@ -103,7 +103,7 @@
#test(str(10 / 3).len() > 10, true) #test(str(10 / 3).len() > 10, true)
--- ---
// Error: 6-8 expected integer, float, label, or string, found content // Error: 6-8 expected integer, float, label, bytes, or string, found content
#str([]) #str([])
--- ---