diff --git a/crates/typst-cli/src/fonts.rs b/crates/typst-cli/src/fonts.rs index 1b4757c8b..3e89e0d6d 100644 --- a/crates/typst-cli/src/fonts.rs +++ b/crates/typst-cli/src/fonts.rs @@ -5,8 +5,8 @@ use std::path::{Path, PathBuf}; use memmap2::Mmap; use typst::diag::StrResult; +use typst::eval::Bytes; use typst::font::{Font, FontBook, FontInfo, FontVariant}; -use typst::util::Bytes; use walkdir::WalkDir; use crate::args::FontsCommand; diff --git a/crates/typst-cli/src/world.rs b/crates/typst-cli/src/world.rs index 06a728a6a..2c0ee7d06 100644 --- a/crates/typst-cli/src/world.rs +++ b/crates/typst-cli/src/world.rs @@ -9,10 +9,10 @@ use comemo::Prehashed; use same_file::Handle; use siphasher::sip128::{Hasher128, SipHasher13}; use typst::diag::{FileError, FileResult, StrResult}; -use typst::eval::{eco_format, Datetime, Library}; +use typst::eval::{eco_format, Bytes, Datetime, Library}; use typst::font::{Font, FontBook}; use typst::syntax::{FileId, Source}; -use typst::util::{Bytes, PathExt}; +use typst::util::PathExt; use typst::World; use crate::args::CompileCommand; diff --git a/crates/typst-docs/src/html.rs b/crates/typst-docs/src/html.rs index b021d4a79..38e56c093 100644 --- a/crates/typst-docs/src/html.rs +++ b/crates/typst-docs/src/html.rs @@ -4,11 +4,10 @@ use comemo::Prehashed; use pulldown_cmark as md; use typed_arena::Arena; use typst::diag::FileResult; -use typst::eval::{Datetime, Tracer}; +use typst::eval::{Bytes, Datetime, Tracer}; use typst::font::{Font, FontBook}; use typst::geom::{Point, Size}; use typst::syntax::{FileId, Source}; -use typst::util::Bytes; use typst::World; use yaml_front_matter::YamlFrontMatter; diff --git a/crates/typst-docs/src/lib.rs b/crates/typst-docs/src/lib.rs index 4dda2c0d7..6d67ffad5 100644 --- a/crates/typst-docs/src/lib.rs +++ b/crates/typst-docs/src/lib.rs @@ -983,6 +983,7 @@ const TYPE_ORDER: &[&str] = &[ "color", "datetime", "string", + "bytes", "regex", "label", "content", diff --git a/crates/typst-library/src/compute/construct.rs b/crates/typst-library/src/compute/construct.rs index 1ce676bb4..4329f259c 100644 --- a/crates/typst-library/src/compute/construct.rs +++ b/crates/typst-library/src/compute/construct.rs @@ -3,7 +3,7 @@ use std::str::FromStr; use time::{Month, PrimitiveDateTime}; -use typst::eval::{Datetime, Module, Regex}; +use typst::eval::{Bytes, Datetime, Module, Reflect, Regex}; use crate::prelude::*; @@ -37,9 +37,9 @@ pub struct ToInt(i64); cast! { ToInt, v: bool => Self(v as i64), - v: i64 => Self(v), v: f64 => Self(v as i64), v: EcoString => Self(v.parse().map_err(|_| eco_format!("invalid integer: {}", v))?), + v: i64 => Self(v), } /// Converts a value to a float. @@ -77,9 +77,9 @@ cast! { ToFloat, v: bool => Self(v as i64 as f64), v: i64 => Self(v as f64), - v: f64 => Self(v), v: Ratio => Self(v.get()), v: EcoString => Self(v.parse().map_err(|_| eco_format!("invalid float: {}", v))?), + v: f64 => Self(v), } /// Creates a grayscale color. @@ -486,6 +486,7 @@ cast! { /// optional `base` parameter. /// - Floats are formatted in base 10 and never in exponential notation. /// - From labels the name is extracted. +/// - Bytes are decoded as UTF-8. /// /// If you wish to convert from and to Unicode code points, see /// [`str.to-unicode`]($func/str.to-unicode) and @@ -545,6 +546,11 @@ cast! { v: i64 => Self::Int(v), v: f64 => Self::Str(format_str!("{}", v)), v: Label => Self::Str(v.0.into()), + v: Bytes => Self::Str( + std::str::from_utf8(&v) + .map_err(|_| "bytes are not valid utf-8")? + .into() + ), v: Str => Self::Str(v), } @@ -633,35 +639,6 @@ cast! { }, } -/// Creates a label from a string. -/// -/// Inserting a label into content attaches it to the closest previous element -/// that is not a space. Then, the element can be [referenced]($func/ref) and -/// styled through the label. -/// -/// ## Example { #example } -/// ```example -/// #show : set text(blue) -/// #show label("b"): set text(red) -/// -/// = Heading -/// *Strong* #label("b") -/// ``` -/// -/// ## Syntax { #syntax } -/// This function also has dedicated syntax: You can create a label by enclosing -/// its name in angle brackets. This works both in markup and code. -/// -/// Display: Label -/// Category: construct -#[func] -pub fn label( - /// The name of the label. - name: EcoString, -) -> Label { - Label(name) -} - /// Creates a regular expression from a string. /// /// The result can be used as a @@ -701,6 +678,106 @@ pub fn regex( Regex::new(®ex.v).at(regex.span) } +/// Converts a value to bytes. +/// +/// - Strings are encoded in UTF-8. +/// - Arrays of integers between `{0}` and `{255}` are converted directly. The +/// dedicated byte representation is much more efficient than the array +/// representation and thus typically used for large byte buffers (e.g. image +/// data). +/// +/// ```example +/// #bytes("Hello 😃") \ +/// #bytes((123, 160, 22, 0)) +/// ``` +/// +/// Display: Bytes +/// Category: construct +#[func] +pub fn bytes( + /// The value that should be converted to a string. + value: ToBytes, +) -> Bytes { + value.0 +} + +/// A value that can be cast to bytes. +pub struct ToBytes(Bytes); + +cast! { + ToBytes, + v: Str => Self(v.as_bytes().into()), + v: Array => Self(v.iter() + .map(|v| match v { + Value::Int(byte @ 0..=255) => Ok(*byte as u8), + Value::Int(_) => bail!("number must be between 0 and 255"), + value => Err(::error(value)), + }) + .collect::, _>>()? + .into() + ), + v: Bytes => Self(v), +} + +/// Creates a label from a string. +/// +/// Inserting a label into content attaches it to the closest previous element +/// that is not a space. Then, the element can be [referenced]($func/ref) and +/// styled through the label. +/// +/// ## Example { #example } +/// ```example +/// #show : set text(blue) +/// #show label("b"): set text(red) +/// +/// = Heading +/// *Strong* #label("b") +/// ``` +/// +/// ## Syntax { #syntax } +/// This function also has dedicated syntax: You can create a label by enclosing +/// its name in angle brackets. This works both in markup and code. +/// +/// Display: Label +/// Category: construct +#[func] +pub fn label( + /// The name of the label. + name: EcoString, +) -> Label { + Label(name) +} + +/// Converts a value to an array. +/// +/// Note that this function is only intended for conversion of a collection-like +/// value to an array, not for creation of an array from individual items. Use +/// the array syntax `(1, 2, 3)` (or `(1,)` for a single-element array) instead. +/// +/// ```example +/// #let hi = "Hello 😃" +/// #array(bytes(hi)) +/// ``` +/// +/// Display: Array +/// Category: construct +#[func] +pub fn array( + /// The value that should be converted to an array. + value: ToArray, +) -> Array { + value.0 +} + +/// A value that can be cast to bytes. +pub struct ToArray(Array); + +cast! { + ToArray, + v: Bytes => Self(v.iter().map(|&b| Value::Int(b as i64)).collect()), + v: Array => Self(v), +} + /// Creates an array consisting of consecutive integers. /// /// If you pass just one positional parameter, it is interpreted as the `end` of diff --git a/crates/typst-library/src/compute/data.rs b/crates/typst-library/src/compute/data.rs index 6e3a298e1..4a7c53cc1 100644 --- a/crates/typst-library/src/compute/data.rs +++ b/crates/typst-library/src/compute/data.rs @@ -1,18 +1,24 @@ use typst::diag::{format_xml_like_error, FileError}; -use typst::eval::Datetime; +use typst::eval::{Bytes, Datetime}; use crate::prelude::*; -/// Reads plain text from a file. +/// Reads plain text or data from a file. /// -/// The file will be read and returned as a string. +/// By default, the file will be read as UTF-8 and returned as a +/// [string]($type/string). +/// +/// If you specify `{encoding: none}`, this returns raw [bytes]($type/bytes) +/// instead. /// /// ## Example { #example } /// ```example +/// An example for a HTML file: \ /// #let text = read("data.html") -/// -/// An example for a HTML file:\ /// #raw(text, lang: "html") +/// +/// Raw bytes: +/// #read("tiger.jpg", encoding: none) /// ``` /// /// Display: Read @@ -21,16 +27,52 @@ use crate::prelude::*; pub fn read( /// Path to a file. path: Spanned, + /// The encoding to read the file with. + /// + /// If set to `{none}`, this function returns raw bytes. + #[named] + #[default(Some(Encoding::Utf8))] + encoding: Option, /// The virtual machine. vm: &mut Vm, -) -> SourceResult { +) -> SourceResult { let Spanned { v: path, span } = path; let id = vm.location().join(&path).at(span)?; let data = vm.world().file(id).at(span)?; - let text = std::str::from_utf8(&data) - .map_err(|_| "file is not valid utf-8") - .at(span)?; - Ok(text.into()) + Ok(match encoding { + None => Readable::Bytes(data), + Some(Encoding::Utf8) => Readable::Str( + std::str::from_utf8(&data) + .map_err(|_| "file is not valid utf-8") + .at(span)? + .into(), + ), + }) +} + +/// An encoding of a file. +#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash, Cast)] +pub enum Encoding { + /// The Unicode UTF-8 encoding. + Utf8, +} + +/// A value that can be read from a value. +pub enum Readable { + /// A decoded string. + Str(Str), + /// Raw bytes. + Bytes(Bytes), +} + +cast! { + Readable, + self => match self { + Self::Str(v) => v.into_value(), + Self::Bytes(v) => v.into_value(), + }, + v: Str => Self::Str(v), + v: Bytes => Self::Bytes(v), } /// Reads structured data from a CSV file. diff --git a/crates/typst-library/src/compute/mod.rs b/crates/typst-library/src/compute/mod.rs index 15309f129..757377f03 100644 --- a/crates/typst-library/src/compute/mod.rs +++ b/crates/typst-library/src/compute/mod.rs @@ -27,8 +27,10 @@ pub(super) fn define(global: &mut Scope) { global.define("datetime", datetime_func()); global.define("symbol", symbol_func()); global.define("str", str_func()); + global.define("bytes", bytes_func()); global.define("label", label_func()); global.define("regex", regex_func()); + global.define("array", array_func()); global.define("range", range_func()); global.define("read", read_func()); global.define("csv", csv_func()); diff --git a/crates/typst-library/src/meta/bibliography.rs b/crates/typst-library/src/meta/bibliography.rs index a2e7099d0..2b00ff445 100644 --- a/crates/typst-library/src/meta/bibliography.rs +++ b/crates/typst-library/src/meta/bibliography.rs @@ -8,7 +8,8 @@ use hayagriva::io::{BibLaTeXError, YamlBibliographyError}; use hayagriva::style::{self, Brackets, Citation, Database, DisplayString, Formatting}; use hayagriva::Entry; use typst::diag::FileError; -use typst::util::{option_eq, Bytes}; +use typst::eval::Bytes; +use typst::util::option_eq; use super::{LinkElem, LocalName, RefElem}; use crate::layout::{BlockElem, GridElem, ParElem, Sizing, TrackSizings, VElem}; diff --git a/crates/typst-library/src/text/raw.rs b/crates/typst-library/src/text/raw.rs index 6713f5d0b..3d3fff408 100644 --- a/crates/typst-library/src/text/raw.rs +++ b/crates/typst-library/src/text/raw.rs @@ -6,8 +6,8 @@ use once_cell::unsync::Lazy as UnsyncLazy; use syntect::highlighting as synt; use syntect::parsing::{SyntaxDefinition, SyntaxSet, SyntaxSetBuilder}; use typst::diag::FileError; +use typst::eval::Bytes; use typst::syntax::{self, LinkedNode}; -use typst::util::Bytes; use super::{ FontFamily, FontList, Hyphenate, LinebreakElem, SmartQuoteElem, TextElem, TextSize, diff --git a/crates/typst-library/src/visualize/image.rs b/crates/typst-library/src/visualize/image.rs index e572f2ad7..514861e5d 100644 --- a/crates/typst-library/src/visualize/image.rs +++ b/crates/typst-library/src/visualize/image.rs @@ -1,8 +1,8 @@ use std::ffi::OsStr; use std::path::Path; +use typst::eval::Bytes; use typst::image::{Image, ImageFormat, RasterFormat, VectorFormat}; -use typst::util::Bytes; use crate::meta::{Figurable, LocalName}; use crate::prelude::*; diff --git a/crates/typst/src/eval/array.rs b/crates/typst/src/eval/array.rs index 86c41ff6c..adb3e858e 100644 --- a/crates/typst/src/eval/array.rs +++ b/crates/typst/src/eval/array.rs @@ -74,13 +74,9 @@ impl Array { } /// Borrow the value at the given index. - pub fn at<'a>( - &'a self, - index: i64, - default: Option<&'a Value>, - ) -> StrResult<&'a Value> { - self.locate(index) - .and_then(|i| self.0.get(i)) + pub fn at(&self, index: i64, default: Option) -> StrResult { + self.locate_opt(index, false) + .and_then(|i| self.0.get(i).cloned()) .or(default) .ok_or_else(|| out_of_bounds_no_default(index, self.len())) } @@ -88,7 +84,7 @@ impl Array { /// Mutably borrow the value at the given index. pub fn at_mut(&mut self, index: i64) -> StrResult<&mut Value> { let len = self.len(); - self.locate(index) + self.locate_opt(index, false) .and_then(move |i| self.0.make_mut().get_mut(i)) .ok_or_else(|| out_of_bounds_no_default(index, len)) } @@ -105,42 +101,21 @@ impl Array { /// Insert a value at the specified index. pub fn insert(&mut self, index: i64, value: Value) -> StrResult<()> { - let len = self.len(); - let i = self - .locate(index) - .filter(|&i| i <= self.0.len()) - .ok_or_else(|| out_of_bounds(index, len))?; - + let i = self.locate(index, true)?; self.0.insert(i, value); Ok(()) } /// Remove and return the value at the specified index. pub fn remove(&mut self, index: i64) -> StrResult { - let len = self.len(); - let i = self - .locate(index) - .filter(|&i| i < self.0.len()) - .ok_or_else(|| out_of_bounds(index, len))?; - + let i = self.locate(index, false)?; Ok(self.0.remove(i)) } /// Extract a contiguous subregion of the array. pub fn slice(&self, start: i64, end: Option) -> StrResult { - let len = self.len(); - let start = self - .locate(start) - .filter(|&start| start <= self.0.len()) - .ok_or_else(|| out_of_bounds(start, len))?; - - let end = end.unwrap_or(self.len() as i64); - let end = self - .locate(end) - .filter(|&end| end <= self.0.len()) - .ok_or_else(|| out_of_bounds(end, len))? - .max(start); - + let start = self.locate(start, true)?; + let end = self.locate(end.unwrap_or(self.len() as i64), true)?.max(start); Ok(self.0[start..end].into()) } @@ -371,26 +346,6 @@ impl Array { Ok(self.iter().cloned().cycle().take(count).collect()) } - /// Extract a slice of the whole array. - pub fn as_slice(&self) -> &[Value] { - self.0.as_slice() - } - - /// Iterate over references to the contained values. - pub fn iter(&self) -> std::slice::Iter { - self.0.iter() - } - - /// Resolve an index. - fn locate(&self, index: i64) -> Option { - usize::try_from(if index >= 0 { - index - } else { - (self.len() as i64).checked_add(index)? - }) - .ok() - } - /// Enumerate all items in the array. pub fn enumerate(&self, start: i64) -> StrResult { self.iter() @@ -438,11 +393,44 @@ impl Array { Ok(Self(out)) } + + /// Extract a slice of the whole array. + pub fn as_slice(&self) -> &[Value] { + self.0.as_slice() + } + + /// Iterate over references to the contained values. + pub fn iter(&self) -> std::slice::Iter { + self.0.iter() + } + + /// Resolve an index or throw an out of bounds error. + fn locate(&self, index: i64, end_ok: bool) -> StrResult { + self.locate_opt(index, end_ok) + .ok_or_else(|| out_of_bounds(index, self.len())) + } + + /// Resolve an index, if it is within bounds. + /// + /// `index == len` is considered in bounds if and only if `end_ok` is true. + fn locate_opt(&self, index: i64, end_ok: bool) -> Option { + let wrapped = + if index >= 0 { Some(index) } else { (self.len() as i64).checked_add(index) }; + + wrapped + .and_then(|v| usize::try_from(v).ok()) + .filter(|&v| v < self.0.len() + end_ok as usize) + } } impl Debug for Array { fn fmt(&self, f: &mut Formatter) -> fmt::Result { - let pieces: Vec<_> = self.iter().map(|value| eco_format!("{value:?}")).collect(); + let max = 40; + let mut pieces: Vec<_> = + self.iter().take(max).map(|value| eco_format!("{value:?}")).collect(); + if self.len() > max { + pieces.push(eco_format!(".. ({} items omitted)", self.len() - max)); + } f.write_str(&pretty_array_like(&pieces, self.len() == 1)) } } diff --git a/crates/typst/src/eval/bytes.rs b/crates/typst/src/eval/bytes.rs new file mode 100644 index 000000000..b24b289e3 --- /dev/null +++ b/crates/typst/src/eval/bytes.rs @@ -0,0 +1,111 @@ +use std::borrow::Cow; +use std::fmt::{self, Debug, Formatter}; +use std::ops::Deref; +use std::sync::Arc; + +use comemo::Prehashed; +use ecow::{eco_format, EcoString}; + +use crate::diag::StrResult; + +use super::Value; + +/// A shared byte buffer that is cheap to clone and hash. +#[derive(Clone, Hash, Eq, PartialEq)] +pub struct Bytes(Arc>>); + +impl Bytes { + /// Create a buffer from a static byte slice. + pub fn from_static(slice: &'static [u8]) -> Self { + Self(Arc::new(Prehashed::new(Cow::Borrowed(slice)))) + } + + /// Get the byte at the given index. + pub fn at(&self, index: i64, default: Option) -> StrResult { + self.locate_opt(index) + .and_then(|i| self.0.get(i).map(|&b| Value::Int(b as i64))) + .or(default) + .ok_or_else(|| out_of_bounds_no_default(index, self.len())) + } + + /// Extract a contiguous subregion of the bytes. + pub fn slice(&self, start: i64, end: Option) -> StrResult { + let start = self.locate(start)?; + let end = self.locate(end.unwrap_or(self.len() as i64))?.max(start); + Ok(self.0[start..end].into()) + } + + /// Return a view into the buffer. + pub fn as_slice(&self) -> &[u8] { + self + } + + /// Return a copy of the buffer as a vector. + pub fn to_vec(&self) -> Vec { + self.0.to_vec() + } + + /// Resolve an index or throw an out of bounds error. + fn locate(&self, index: i64) -> StrResult { + self.locate_opt(index).ok_or_else(|| out_of_bounds(index, self.len())) + } + + /// Resolve an index, if it is within bounds. + /// + /// `index == len` is considered in bounds. + fn locate_opt(&self, index: i64) -> Option { + let wrapped = + if index >= 0 { Some(index) } else { (self.len() as i64).checked_add(index) }; + + wrapped + .and_then(|v| usize::try_from(v).ok()) + .filter(|&v| v <= self.0.len()) + } +} + +impl From<&[u8]> for Bytes { + fn from(slice: &[u8]) -> Self { + Self(Arc::new(Prehashed::new(slice.to_vec().into()))) + } +} + +impl From> for Bytes { + fn from(vec: Vec) -> Self { + Self(Arc::new(Prehashed::new(vec.into()))) + } +} + +impl Deref for Bytes { + type Target = [u8]; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl AsRef<[u8]> for Bytes { + fn as_ref(&self) -> &[u8] { + self + } +} + +impl Debug for Bytes { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + write!(f, "bytes({})", self.len()) + } +} + +/// The out of bounds access error message. +#[cold] +fn out_of_bounds(index: i64, len: usize) -> EcoString { + eco_format!("byte index out of bounds (index: {index}, len: {len})") +} + +/// The out of bounds access error message when no default value was given. +#[cold] +fn out_of_bounds_no_default(index: i64, len: usize) -> EcoString { + eco_format!( + "byte index out of bounds (index: {index}, len: {len}) \ + and no default value was specified", + ) +} diff --git a/crates/typst/src/eval/dict.rs b/crates/typst/src/eval/dict.rs index 3e6233aec..3b007c759 100644 --- a/crates/typst/src/eval/dict.rs +++ b/crates/typst/src/eval/dict.rs @@ -49,12 +49,12 @@ impl Dict { } /// Borrow the value the given `key` maps to, - pub fn at<'a>( - &'a self, - key: &str, - default: Option<&'a Value>, - ) -> StrResult<&'a Value> { - self.0.get(key).or(default).ok_or_else(|| missing_key_no_default(key)) + pub fn at(&self, key: &str, default: Option) -> StrResult { + self.0 + .get(key) + .cloned() + .or(default) + .ok_or_else(|| missing_key_no_default(key)) } /// Mutably borrow the value the given `key` maps to. @@ -140,8 +140,10 @@ impl Debug for Dict { return f.write_str("(:)"); } - let pieces: Vec<_> = self + let max = 40; + let mut pieces: Vec<_> = self .iter() + .take(max) .map(|(key, value)| { if is_ident(key) { eco_format!("{key}: {value:?}") @@ -151,6 +153,10 @@ impl Debug for Dict { }) .collect(); + if self.len() > max { + pieces.push(eco_format!(".. ({} pairs omitted)", self.len() - max)); + } + f.write_str(&pretty_array_like(&pieces, false)) } } diff --git a/crates/typst/src/eval/methods.rs b/crates/typst/src/eval/methods.rs index 6a846f524..0247a4a73 100644 --- a/crates/typst/src/eval/methods.rs +++ b/crates/typst/src/eval/methods.rs @@ -55,11 +55,10 @@ pub fn call( "len" => string.len().into_value(), "first" => string.first().at(span)?.into_value(), "last" => string.last().at(span)?.into_value(), - "at" => { - let index = args.expect("index")?; - let default = args.named::("default")?; - string.at(index, default.as_deref()).at(span)?.into_value() - } + "at" => string + .at(args.expect("index")?, args.named("default")?) + .at(span)? + .into_value(), "slice" => { let start = args.expect("start")?; let mut end = args.eat()?; @@ -93,11 +92,25 @@ pub fn call( _ => return missing(), }, + Value::Bytes(bytes) => match method { + "len" => bytes.len().into_value(), + "at" => bytes.at(args.expect("index")?, args.named("default")?).at(span)?, + "slice" => { + let start = args.expect("start")?; + let mut end = args.eat()?; + if end.is_none() { + end = args.named("count")?.map(|c: i64| start + c); + } + bytes.slice(start, end).at(span)?.into_value() + } + _ => return missing(), + }, + Value::Content(content) => match method { "func" => content.func().into_value(), "has" => content.has(&args.expect::("field")?).into_value(), "at" => content - .at(&args.expect::("field")?, args.named("default")?) + .at(&args.expect::("field")?, args.named("default")?) .at(span)?, "fields" => content.dict().into_value(), "location" => content @@ -112,10 +125,7 @@ pub fn call( "len" => array.len().into_value(), "first" => array.first().at(span)?.clone(), "last" => array.last().at(span)?.clone(), - "at" => array - .at(args.expect("index")?, args.named("default")?.as_ref()) - .at(span)? - .clone(), + "at" => array.at(args.expect("index")?, args.named("default")?).at(span)?, "slice" => { let start = args.expect("start")?; let mut end = args.eat()?; @@ -157,9 +167,8 @@ pub fn call( Value::Dict(dict) => match method { "len" => dict.len().into_value(), "at" => dict - .at(&args.expect::("key")?, args.named("default")?.as_ref()) - .at(span)? - .clone(), + .at(&args.expect::("key")?, args.named("default")?) + .at(span)?, "keys" => dict.keys().into_value(), "values" => dict.values().into_value(), "pairs" => dict.pairs().into_value(), @@ -396,6 +405,7 @@ pub fn methods_on(type_name: &str) -> &[(&'static str, bool)] { ("starts-with", true), ("trim", true), ], + "bytes" => &[("len", false), ("at", true), ("slice", true)], "content" => &[ ("func", false), ("has", true), diff --git a/crates/typst/src/eval/mod.rs b/crates/typst/src/eval/mod.rs index 15f0fdddd..770d9fd1c 100644 --- a/crates/typst/src/eval/mod.rs +++ b/crates/typst/src/eval/mod.rs @@ -14,6 +14,7 @@ mod str; mod value; mod args; mod auto; +mod bytes; mod datetime; mod fields; mod func; @@ -40,6 +41,7 @@ pub use typst_macros::{func, symbols}; pub use self::args::{Arg, Args}; pub use self::array::{array, Array}; pub use self::auto::AutoValue; +pub use self::bytes::Bytes; pub use self::cast::{ cast, Cast, CastInfo, FromValue, IntoResult, IntoValue, Never, Reflect, Variadics, }; @@ -1371,7 +1373,7 @@ where let Ok(v) = value.at(i as i64, None) else { bail!(expr.span(), "not enough elements to destructure"); }; - f(vm, expr, v.clone())?; + f(vm, expr, v)?; i += 1; } ast::DestructuringKind::Sink(spread) => { @@ -1423,7 +1425,7 @@ where .at(&ident, None) .map_err(|_| "destructuring key not found in dictionary") .at(ident.span())?; - f(vm, ast::Expr::Ident(ident.clone()), v.clone())?; + f(vm, ast::Expr::Ident(ident.clone()), v)?; used.insert(ident.take()); } ast::DestructuringKind::Sink(spread) => sink = spread.expr(), @@ -1433,7 +1435,7 @@ where .at(&name, None) .map_err(|_| "destructuring key not found in dictionary") .at(name.span())?; - f(vm, named.expr(), v.clone())?; + f(vm, named.expr(), v)?; used.insert(name.take()); } ast::DestructuringKind::Placeholder(_) => {} diff --git a/crates/typst/src/eval/ops.rs b/crates/typst/src/eval/ops.rs index 0880a87e9..323175995 100644 --- a/crates/typst/src/eval/ops.rs +++ b/crates/typst/src/eval/ops.rs @@ -347,6 +347,7 @@ pub fn equal(lhs: &Value, rhs: &Value) -> bool { (Color(a), Color(b)) => a == b, (Symbol(a), Symbol(b)) => a == b, (Str(a), Str(b)) => a == b, + (Bytes(a), Bytes(b)) => a == b, (Label(a), Label(b)) => a == b, (Content(a), Content(b)) => a == b, (Array(a), Array(b)) => a == b, diff --git a/crates/typst/src/eval/str.rs b/crates/typst/src/eval/str.rs index f5e5ab00a..1d88b81b5 100644 --- a/crates/typst/src/eval/str.rs +++ b/crates/typst/src/eval/str.rs @@ -68,14 +68,12 @@ impl Str { } /// Extract the grapheme cluster at the given index. - pub fn at<'a>(&'a self, index: i64, default: Option<&'a str>) -> StrResult { + pub fn at(&self, index: i64, default: Option) -> StrResult { let len = self.len(); - let grapheme = self - .locate_opt(index)? - .and_then(|i| self.0[i..].graphemes(true).next()) + self.locate_opt(index)? + .and_then(|i| self.0[i..].graphemes(true).next().map(|s| s.into_value())) .or(default) - .ok_or_else(|| no_default_and_out_of_bounds(index, len))?; - Ok(grapheme.into()) + .ok_or_else(|| no_default_and_out_of_bounds(index, len)) } /// Extract a contiguous substring. @@ -324,8 +322,15 @@ impl Str { Ok(Self(self.0.repeat(n))) } - /// Resolve an index, if it is within bounds. - /// Errors on invalid char boundaries. + /// Resolve an index or throw an out of bounds error. + fn locate(&self, index: i64) -> StrResult { + self.locate_opt(index)? + .ok_or_else(|| out_of_bounds(index, self.len())) + } + + /// Resolve an index, if it is within bounds and on a valid char boundary. + /// + /// `index == len` is considered in bounds. fn locate_opt(&self, index: i64) -> StrResult> { let wrapped = if index >= 0 { Some(index) } else { (self.len() as i64).checked_add(index) }; @@ -340,12 +345,6 @@ impl Str { Ok(resolved) } - - /// Resolve an index or throw an out of bounds error. - fn locate(&self, index: i64) -> StrResult { - self.locate_opt(index)? - .ok_or_else(|| out_of_bounds(index, self.len())) - } } /// The out of bounds access error message. diff --git a/crates/typst/src/eval/value.rs b/crates/typst/src/eval/value.rs index d324c891f..1894bac02 100644 --- a/crates/typst/src/eval/value.rs +++ b/crates/typst/src/eval/value.rs @@ -8,14 +8,13 @@ use ecow::eco_format; use siphasher::sip128::{Hasher128, SipHasher13}; use super::{ - cast, fields, format_str, ops, Args, Array, CastInfo, Content, Dict, FromValue, Func, - IntoValue, Module, Reflect, Str, Symbol, + cast, fields, format_str, ops, Args, Array, Bytes, CastInfo, Content, Dict, + FromValue, Func, IntoValue, Module, Reflect, Str, Symbol, }; use crate::diag::StrResult; use crate::geom::{Abs, Angle, Color, Em, Fr, Length, Ratio, Rel}; use crate::model::{Label, Styles}; use crate::syntax::{ast, Span}; -use crate::util::Bytes; /// A computational value. #[derive(Default, Clone)] @@ -132,7 +131,7 @@ impl Value { pub fn field(&self, field: &str) -> StrResult { match self { Self::Symbol(symbol) => symbol.clone().modified(field).map(Self::Symbol), - Self::Dict(dict) => dict.at(field, None).cloned(), + Self::Dict(dict) => dict.at(field, None), Self::Content(content) => content.at(field, None), Self::Module(module) => module.get(field).cloned(), Self::Func(func) => func.get(field).cloned(), diff --git a/crates/typst/src/export/pdf/font.rs b/crates/typst/src/export/pdf/font.rs index f0676d8fa..47c0b027f 100644 --- a/crates/typst/src/export/pdf/font.rs +++ b/crates/typst/src/export/pdf/font.rs @@ -7,8 +7,9 @@ use ttf_parser::{name_id, GlyphId, Tag}; use unicode_general_category::GeneralCategory; use super::{deflate, EmExt, PdfContext, RefExt}; +use crate::eval::Bytes; use crate::font::Font; -use crate::util::{Bytes, SliceExt}; +use crate::util::SliceExt; const CMAP_NAME: Name = Name(b"Custom"); const SYSTEM_INFO: SystemInfo = SystemInfo { diff --git a/crates/typst/src/export/pdf/image.rs b/crates/typst/src/export/pdf/image.rs index 48472d9f9..a6dda3559 100644 --- a/crates/typst/src/export/pdf/image.rs +++ b/crates/typst/src/export/pdf/image.rs @@ -4,8 +4,8 @@ use image::{DynamicImage, GenericImageView, Rgba}; use pdf_writer::{Filter, Finish}; use super::{deflate, PdfContext, RefExt}; +use crate::eval::Bytes; use crate::image::{DecodedImage, Image, RasterFormat}; -use crate::util::Bytes; /// Embed all used images into the PDF. #[tracing::instrument(skip_all)] diff --git a/crates/typst/src/font/mod.rs b/crates/typst/src/font/mod.rs index 888960f1b..2dd1ad322 100644 --- a/crates/typst/src/font/mod.rs +++ b/crates/typst/src/font/mod.rs @@ -13,9 +13,8 @@ use std::sync::Arc; use ttf_parser::GlyphId; use self::book::find_name; -use crate::eval::Cast; +use crate::eval::{Bytes, Cast}; use crate::geom::Em; -use crate::util::Bytes; /// An OpenType font. /// diff --git a/crates/typst/src/ide/complete.rs b/crates/typst/src/ide/complete.rs index c633731cb..b6992e19a 100644 --- a/crates/typst/src/ide/complete.rs +++ b/crates/typst/src/ide/complete.rs @@ -896,13 +896,13 @@ fn code_completions(ctx: &mut CompletionContext, hashtag: bool) { ); ctx.snippet_completion( - "array", + "array literal", "(${1, 2, 3})", "Creates a sequence of values.", ); ctx.snippet_completion( - "dictionary", + "dictionary literal", "(${a: 1, b: 2})", "Creates a mapping from names to value.", ); diff --git a/crates/typst/src/image.rs b/crates/typst/src/image.rs index 3a245c147..1b62a5ace 100644 --- a/crates/typst/src/image.rs +++ b/crates/typst/src/image.rs @@ -16,9 +16,9 @@ use image::{ImageDecoder, ImageResult}; use usvg::{TreeParsing, TreeTextToPath}; use crate::diag::{format_xml_like_error, StrResult}; +use crate::eval::Bytes; use crate::font::Font; use crate::geom::Axes; -use crate::util::Bytes; use crate::World; /// A raster or vector image. diff --git a/crates/typst/src/lib.rs b/crates/typst/src/lib.rs index 3365bbaae..514aa25e4 100644 --- a/crates/typst/src/lib.rs +++ b/crates/typst/src/lib.rs @@ -60,10 +60,9 @@ use ecow::EcoString; use crate::diag::{FileResult, SourceResult}; use crate::doc::Document; -use crate::eval::{Datetime, Library, Route, Tracer}; +use crate::eval::{Bytes, Datetime, Library, Route, Tracer}; use crate::font::{Font, FontBook}; use crate::syntax::{FileId, PackageSpec, Source, Span}; -use crate::util::Bytes; /// Compile a source file into a fully layouted document. #[tracing::instrument(skip_all)] diff --git a/crates/typst/src/util/bytes.rs b/crates/typst/src/util/bytes.rs deleted file mode 100644 index 9165467b1..000000000 --- a/crates/typst/src/util/bytes.rs +++ /dev/null @@ -1,59 +0,0 @@ -use std::borrow::Cow; -use std::fmt::{self, Debug, Formatter}; -use std::ops::Deref; -use std::sync::Arc; - -use comemo::Prehashed; - -/// A shared byte buffer that is cheap to clone and hash. -#[derive(Clone, Hash, Eq, PartialEq)] -pub struct Bytes(Arc>>); - -impl Bytes { - /// Create a buffer from a static byte slice. - pub fn from_static(slice: &'static [u8]) -> Self { - Self(Arc::new(Prehashed::new(Cow::Borrowed(slice)))) - } - - /// Return a view into the buffer. - pub fn as_slice(&self) -> &[u8] { - self - } - - /// Return a copy of the buffer as a vector. - pub fn to_vec(&self) -> Vec { - self.0.to_vec() - } -} - -impl From<&[u8]> for Bytes { - fn from(slice: &[u8]) -> Self { - Self(Arc::new(Prehashed::new(slice.to_vec().into()))) - } -} - -impl From> for Bytes { - fn from(vec: Vec) -> Self { - Self(Arc::new(Prehashed::new(vec.into()))) - } -} - -impl Deref for Bytes { - type Target = [u8]; - - fn deref(&self) -> &Self::Target { - &self.0 - } -} - -impl AsRef<[u8]> for Bytes { - fn as_ref(&self) -> &[u8] { - self - } -} - -impl Debug for Bytes { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - write!(f, "bytes({})", self.len()) - } -} diff --git a/crates/typst/src/util/mod.rs b/crates/typst/src/util/mod.rs index 39c62a93e..a6e2d5ea7 100644 --- a/crates/typst/src/util/mod.rs +++ b/crates/typst/src/util/mod.rs @@ -2,10 +2,6 @@ pub mod fat; -mod bytes; - -pub use bytes::Bytes; - use std::fmt::{self, Debug, Formatter}; use std::hash::Hash; use std::num::NonZeroUsize; diff --git a/docs/reference/types.md b/docs/reference/types.md index d7cfaa653..62c59cb00 100644 --- a/docs/reference/types.md +++ b/docs/reference/types.md @@ -46,6 +46,8 @@ integers, integers cannot be smaller than `{-9223372036854775808}` or larger tha The number can also be specified as hexadecimal, octal, or binary by starting it with a zero followed by either `x`, `o`, or `b`. +You can convert a value to an integer with the [`float`]($func/float) function. + ## Example ```example #(1 + 2) \ @@ -64,6 +66,8 @@ A limited-precision representation of a real number. Typst uses 64 bits to store floats. Wherever a float is expected, you can also pass an [integer]($type/integer). +You can convert a value to a float with the [`float`]($func/float) function. + ## Example ```example #3.14 \ @@ -87,6 +91,8 @@ A length has the following fields: - `abs`: A length with just the absolute component of the current length (that is, excluding the `em` component). +You can multiply lengths with and divide them by integers and floats. + ## Example ```example #rect(width: 20pt) @@ -458,6 +464,65 @@ $arrow.r$ \ $arrow.t.quad$ ``` +# Bytes +A sequence of bytes. + +This is conceptually similar to an array of [integers]($type/integer) between +`{0}` and `{255}`, but represented much more efficiently. + +You can convert +- a [string]($type/string) or an [array]($type/array) of integers to bytes with + the [`bytes`]($func/bytes) function +- bytes to a string with the [`str`]($func/str) function +- bytes to an array of integers with the [`array`]($func/array) function + +When [reading]($func/read) data from a file, you can decide whether to load it +as a string or as raw bytes. + +```example +#bytes((123, 160, 22, 0)) \ +#bytes("Hello 😃") + +#let data = read( + "rhino.png", + encoding: none, +) + +// Magic bytes. +#array(data.slice(0, 4)) \ +#str(data.slice(1, 4)) +``` + +## Methods +### len() +The length in bytes. + +- returns: integer + +### at() +Returns the byte at the specified index. Returns the default value if the index +is out of bounds or fails with an error if no default value was specified. + +- index: integer (positional, required) + The index at which to retrieve the byte. +- default: any (named) + A default value to return if the index is out of bounds. +- returns: integer or any + +### slice() +Extract a subslice of the bytes. +Fails with an error if the start or index is out of bounds. + +- start: integer (positional, required) + The start index (inclusive). +- end: integer (positional) + The end index (exclusive). If omitted, the whole slice until the end is + extracted. +- count: integer (named) + The number of bytes to extract. This is equivalent to passing + `start + count` as the `end` position. Mutually exclusive with `end`. +- returns: bytes + # String A sequence of Unicode codepoints. @@ -475,6 +540,8 @@ quite versatile. All lengths and indices are expressed in terms of UTF-8 characters. Indices are zero-based and negative indices wrap around to the end of the string. +You can convert a value to a string with the [`str`]($func/str) function. + ### Example ```example #"hello world!" \ @@ -521,7 +588,7 @@ value was specified. The byte index. - default: any (named) A default value to return if the index is out of bounds. -- returns: string +- returns: string or any ### slice() Extract a substring of the string. @@ -839,8 +906,8 @@ Fails with an error if the start or index is out of bounds. The end index (exclusive). If omitted, the whole slice until the end of the array is extracted. - count: integer (named) - The number of items to extract. This is equivalent to passing `start + - count` as the `end` position. Mutually exclusive with `end`. + The number of items to extract. This is equivalent to passing + `start + count` as the `end` position. Mutually exclusive with `end`. - returns: array ### contains() diff --git a/tests/src/benches.rs b/tests/src/benches.rs index 524fda19b..a100e24e3 100644 --- a/tests/src/benches.rs +++ b/tests/src/benches.rs @@ -1,11 +1,10 @@ use comemo::{Prehashed, Track, Tracked}; use iai::{black_box, main, Iai}; use typst::diag::FileResult; -use typst::eval::{Datetime, Library, Tracer}; +use typst::eval::{Bytes, Datetime, Library, Tracer}; use typst::font::{Font, FontBook}; use typst::geom::Color; use typst::syntax::{FileId, Source}; -use typst::util::Bytes; use typst::World; use unscanny::Scanner; diff --git a/tests/src/tests.rs b/tests/src/tests.rs index f7eceeada..518233613 100644 --- a/tests/src/tests.rs +++ b/tests/src/tests.rs @@ -22,11 +22,11 @@ use walkdir::WalkDir; use typst::diag::{bail, FileError, FileResult, Severity, StrResult}; use typst::doc::{Document, Frame, FrameItem, Meta}; -use typst::eval::{eco_format, func, Datetime, Library, NoneValue, Tracer, Value}; +use typst::eval::{eco_format, func, Bytes, Datetime, Library, NoneValue, Tracer, Value}; use typst::font::{Font, FontBook}; use typst::geom::{Abs, Color, RgbaColor, Smart}; use typst::syntax::{FileId, Source, Span, SyntaxNode}; -use typst::util::{Bytes, PathExt}; +use typst::util::PathExt; use typst::World; use typst_library::layout::{Margin, PageElem}; use typst_library::text::{TextElem, TextSize}; diff --git a/tests/typ/compiler/bytes.typ b/tests/typ/compiler/bytes.typ new file mode 100644 index 000000000..32d0d573d --- /dev/null +++ b/tests/typ/compiler/bytes.typ @@ -0,0 +1,21 @@ +// Test the bytes type. +// Ref: false + +--- +#let data = read("/files/rhino.png", encoding: none) +#test(data.len(), 232243) +#test(data.slice(0, count: 5), bytes((137, 80, 78, 71, 13))) +#test(str(data.slice(1, 4)), "PNG") +#test(repr(data), "bytes(232243)") + +--- +#test(str(bytes(range(0x41, 0x50))), "ABCDEFGHIJKLMNO") +#test(array(bytes("Hello")), (0x48, 0x65, 0x6C, 0x6C, 0x6F)) + +--- +// Error: 8-14 expected string, array, or bytes, found dictionary +#bytes((a: 1)) + +--- +// Error: 8-15 expected bytes or array, found string +#array("hello") diff --git a/tests/typ/compiler/string.typ b/tests/typ/compiler/string.typ index c4c1669e5..4241361a2 100644 --- a/tests/typ/compiler/string.typ +++ b/tests/typ/compiler/string.typ @@ -41,8 +41,7 @@ #"Hello".at(5) --- -// Error: 25-32 expected string, found dictionary -#"Hello".at(5, default: (a: 10)) +#test("Hello".at(5, default: (a: 10)), (a: 10)) --- // Test the `slice` method. diff --git a/tests/typ/compute/calc.typ b/tests/typ/compute/calc.typ index cd97dfab3..bdaf28a70 100644 --- a/tests/typ/compute/calc.typ +++ b/tests/typ/compute/calc.typ @@ -18,11 +18,11 @@ #test(calc.round(calc.pi, digits: 2), 3.14) --- -// Error: 6-10 expected boolean, integer, float, or string, found length +// Error: 6-10 expected boolean, float, string, or integer, found length #int(10pt) --- -// Error: 8-13 expected boolean, integer, float, ratio, or string, found function +// Error: 8-13 expected boolean, integer, ratio, string, or float, found function #float(float) --- diff --git a/tests/typ/compute/construct.typ b/tests/typ/compute/construct.typ index ddd4c5912..ea9d28161 100644 --- a/tests/typ/compute/construct.typ +++ b/tests/typ/compute/construct.typ @@ -103,7 +103,7 @@ #test(str(10 / 3).len() > 10, true) --- -// Error: 6-8 expected integer, float, label, or string, found content +// Error: 6-8 expected integer, float, label, bytes, or string, found content #str([]) ---