Bytes type

- Moves `Bytes` from `util` to `eval` module
- Accepts bytes in `str` function for bytes -> str conversion
- Adds `bytes` function for str | array -> bytes conversion
- Adds `array` function for bytes -> array conversion
- Adds `len`, `at`, and `slice` methods for bytes
- Adds `encoding` parameter to `read` function
This commit is contained in:
Laurenz 2023-08-04 15:09:01 +02:00
parent 028d2f5308
commit b8b0137504
33 changed files with 489 additions and 229 deletions

View File

@ -5,8 +5,8 @@ use std::path::{Path, PathBuf};
use memmap2::Mmap;
use typst::diag::StrResult;
use typst::eval::Bytes;
use typst::font::{Font, FontBook, FontInfo, FontVariant};
use typst::util::Bytes;
use walkdir::WalkDir;
use crate::args::FontsCommand;

View File

@ -9,10 +9,10 @@ use comemo::Prehashed;
use same_file::Handle;
use siphasher::sip128::{Hasher128, SipHasher13};
use typst::diag::{FileError, FileResult, StrResult};
use typst::eval::{eco_format, Datetime, Library};
use typst::eval::{eco_format, Bytes, Datetime, Library};
use typst::font::{Font, FontBook};
use typst::syntax::{FileId, Source};
use typst::util::{Bytes, PathExt};
use typst::util::PathExt;
use typst::World;
use crate::args::CompileCommand;

View File

@ -4,11 +4,10 @@ use comemo::Prehashed;
use pulldown_cmark as md;
use typed_arena::Arena;
use typst::diag::FileResult;
use typst::eval::{Datetime, Tracer};
use typst::eval::{Bytes, Datetime, Tracer};
use typst::font::{Font, FontBook};
use typst::geom::{Point, Size};
use typst::syntax::{FileId, Source};
use typst::util::Bytes;
use typst::World;
use yaml_front_matter::YamlFrontMatter;

View File

@ -983,6 +983,7 @@ const TYPE_ORDER: &[&str] = &[
"color",
"datetime",
"string",
"bytes",
"regex",
"label",
"content",

View File

@ -3,7 +3,7 @@ use std::str::FromStr;
use time::{Month, PrimitiveDateTime};
use typst::eval::{Datetime, Module, Regex};
use typst::eval::{Bytes, Datetime, Module, Reflect, Regex};
use crate::prelude::*;
@ -37,9 +37,9 @@ pub struct ToInt(i64);
cast! {
ToInt,
v: bool => Self(v as i64),
v: i64 => Self(v),
v: f64 => Self(v as i64),
v: EcoString => Self(v.parse().map_err(|_| eco_format!("invalid integer: {}", v))?),
v: i64 => Self(v),
}
/// Converts a value to a float.
@ -77,9 +77,9 @@ cast! {
ToFloat,
v: bool => Self(v as i64 as f64),
v: i64 => Self(v as f64),
v: f64 => Self(v),
v: Ratio => Self(v.get()),
v: EcoString => Self(v.parse().map_err(|_| eco_format!("invalid float: {}", v))?),
v: f64 => Self(v),
}
/// Creates a grayscale color.
@ -486,6 +486,7 @@ cast! {
/// optional `base` parameter.
/// - Floats are formatted in base 10 and never in exponential notation.
/// - From labels the name is extracted.
/// - Bytes are decoded as UTF-8.
///
/// If you wish to convert from and to Unicode code points, see
/// [`str.to-unicode`]($func/str.to-unicode) and
@ -545,6 +546,11 @@ cast! {
v: i64 => Self::Int(v),
v: f64 => Self::Str(format_str!("{}", v)),
v: Label => Self::Str(v.0.into()),
v: Bytes => Self::Str(
std::str::from_utf8(&v)
.map_err(|_| "bytes are not valid utf-8")?
.into()
),
v: Str => Self::Str(v),
}
@ -633,35 +639,6 @@ cast! {
},
}
/// Creates a label from a string.
///
/// Inserting a label into content attaches it to the closest previous element
/// that is not a space. Then, the element can be [referenced]($func/ref) and
/// styled through the label.
///
/// ## Example { #example }
/// ```example
/// #show <a>: set text(blue)
/// #show label("b"): set text(red)
///
/// = Heading <a>
/// *Strong* #label("b")
/// ```
///
/// ## Syntax { #syntax }
/// This function also has dedicated syntax: You can create a label by enclosing
/// its name in angle brackets. This works both in markup and code.
///
/// Display: Label
/// Category: construct
#[func]
pub fn label(
/// The name of the label.
name: EcoString,
) -> Label {
Label(name)
}
/// Creates a regular expression from a string.
///
/// The result can be used as a
@ -701,6 +678,106 @@ pub fn regex(
Regex::new(&regex.v).at(regex.span)
}
/// Converts a value to bytes.
///
/// - Strings are encoded in UTF-8.
/// - Arrays of integers between `{0}` and `{255}` are converted directly. The
/// dedicated byte representation is much more efficient than the array
/// representation and thus typically used for large byte buffers (e.g. image
/// data).
///
/// ```example
/// #bytes("Hello 😃") \
/// #bytes((123, 160, 22, 0))
/// ```
///
/// Display: Bytes
/// Category: construct
#[func]
pub fn bytes(
/// The value that should be converted to a string.
value: ToBytes,
) -> Bytes {
value.0
}
/// A value that can be cast to bytes.
pub struct ToBytes(Bytes);
cast! {
ToBytes,
v: Str => Self(v.as_bytes().into()),
v: Array => Self(v.iter()
.map(|v| match v {
Value::Int(byte @ 0..=255) => Ok(*byte as u8),
Value::Int(_) => bail!("number must be between 0 and 255"),
value => Err(<u8 as Reflect>::error(value)),
})
.collect::<Result<Vec<u8>, _>>()?
.into()
),
v: Bytes => Self(v),
}
/// Creates a label from a string.
///
/// Inserting a label into content attaches it to the closest previous element
/// that is not a space. Then, the element can be [referenced]($func/ref) and
/// styled through the label.
///
/// ## Example { #example }
/// ```example
/// #show <a>: set text(blue)
/// #show label("b"): set text(red)
///
/// = Heading <a>
/// *Strong* #label("b")
/// ```
///
/// ## Syntax { #syntax }
/// This function also has dedicated syntax: You can create a label by enclosing
/// its name in angle brackets. This works both in markup and code.
///
/// Display: Label
/// Category: construct
#[func]
pub fn label(
/// The name of the label.
name: EcoString,
) -> Label {
Label(name)
}
/// Converts a value to an array.
///
/// Note that this function is only intended for conversion of a collection-like
/// value to an array, not for creation of an array from individual items. Use
/// the array syntax `(1, 2, 3)` (or `(1,)` for a single-element array) instead.
///
/// ```example
/// #let hi = "Hello 😃"
/// #array(bytes(hi))
/// ```
///
/// Display: Array
/// Category: construct
#[func]
pub fn array(
/// The value that should be converted to an array.
value: ToArray,
) -> Array {
value.0
}
/// A value that can be cast to bytes.
pub struct ToArray(Array);
cast! {
ToArray,
v: Bytes => Self(v.iter().map(|&b| Value::Int(b as i64)).collect()),
v: Array => Self(v),
}
/// Creates an array consisting of consecutive integers.
///
/// If you pass just one positional parameter, it is interpreted as the `end` of

View File

@ -1,18 +1,24 @@
use typst::diag::{format_xml_like_error, FileError};
use typst::eval::Datetime;
use typst::eval::{Bytes, Datetime};
use crate::prelude::*;
/// Reads plain text from a file.
/// Reads plain text or data from a file.
///
/// The file will be read and returned as a string.
/// By default, the file will be read as UTF-8 and returned as a
/// [string]($type/string).
///
/// If you specify `{encoding: none}`, this returns raw [bytes]($type/bytes)
/// instead.
///
/// ## Example { #example }
/// ```example
/// An example for a HTML file: \
/// #let text = read("data.html")
///
/// An example for a HTML file:\
/// #raw(text, lang: "html")
///
/// Raw bytes:
/// #read("tiger.jpg", encoding: none)
/// ```
///
/// Display: Read
@ -21,16 +27,52 @@ use crate::prelude::*;
pub fn read(
/// Path to a file.
path: Spanned<EcoString>,
/// The encoding to read the file with.
///
/// If set to `{none}`, this function returns raw bytes.
#[named]
#[default(Some(Encoding::Utf8))]
encoding: Option<Encoding>,
/// The virtual machine.
vm: &mut Vm,
) -> SourceResult<Str> {
) -> SourceResult<Readable> {
let Spanned { v: path, span } = path;
let id = vm.location().join(&path).at(span)?;
let data = vm.world().file(id).at(span)?;
let text = std::str::from_utf8(&data)
.map_err(|_| "file is not valid utf-8")
.at(span)?;
Ok(text.into())
Ok(match encoding {
None => Readable::Bytes(data),
Some(Encoding::Utf8) => Readable::Str(
std::str::from_utf8(&data)
.map_err(|_| "file is not valid utf-8")
.at(span)?
.into(),
),
})
}
/// An encoding of a file.
#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash, Cast)]
pub enum Encoding {
/// The Unicode UTF-8 encoding.
Utf8,
}
/// A value that can be read from a value.
pub enum Readable {
/// A decoded string.
Str(Str),
/// Raw bytes.
Bytes(Bytes),
}
cast! {
Readable,
self => match self {
Self::Str(v) => v.into_value(),
Self::Bytes(v) => v.into_value(),
},
v: Str => Self::Str(v),
v: Bytes => Self::Bytes(v),
}
/// Reads structured data from a CSV file.

View File

@ -27,8 +27,10 @@ pub(super) fn define(global: &mut Scope) {
global.define("datetime", datetime_func());
global.define("symbol", symbol_func());
global.define("str", str_func());
global.define("bytes", bytes_func());
global.define("label", label_func());
global.define("regex", regex_func());
global.define("array", array_func());
global.define("range", range_func());
global.define("read", read_func());
global.define("csv", csv_func());

View File

@ -8,7 +8,8 @@ use hayagriva::io::{BibLaTeXError, YamlBibliographyError};
use hayagriva::style::{self, Brackets, Citation, Database, DisplayString, Formatting};
use hayagriva::Entry;
use typst::diag::FileError;
use typst::util::{option_eq, Bytes};
use typst::eval::Bytes;
use typst::util::option_eq;
use super::{LinkElem, LocalName, RefElem};
use crate::layout::{BlockElem, GridElem, ParElem, Sizing, TrackSizings, VElem};

View File

@ -6,8 +6,8 @@ use once_cell::unsync::Lazy as UnsyncLazy;
use syntect::highlighting as synt;
use syntect::parsing::{SyntaxDefinition, SyntaxSet, SyntaxSetBuilder};
use typst::diag::FileError;
use typst::eval::Bytes;
use typst::syntax::{self, LinkedNode};
use typst::util::Bytes;
use super::{
FontFamily, FontList, Hyphenate, LinebreakElem, SmartQuoteElem, TextElem, TextSize,

View File

@ -1,8 +1,8 @@
use std::ffi::OsStr;
use std::path::Path;
use typst::eval::Bytes;
use typst::image::{Image, ImageFormat, RasterFormat, VectorFormat};
use typst::util::Bytes;
use crate::meta::{Figurable, LocalName};
use crate::prelude::*;

View File

@ -74,13 +74,9 @@ impl Array {
}
/// Borrow the value at the given index.
pub fn at<'a>(
&'a self,
index: i64,
default: Option<&'a Value>,
) -> StrResult<&'a Value> {
self.locate(index)
.and_then(|i| self.0.get(i))
pub fn at(&self, index: i64, default: Option<Value>) -> StrResult<Value> {
self.locate_opt(index, false)
.and_then(|i| self.0.get(i).cloned())
.or(default)
.ok_or_else(|| out_of_bounds_no_default(index, self.len()))
}
@ -88,7 +84,7 @@ impl Array {
/// Mutably borrow the value at the given index.
pub fn at_mut(&mut self, index: i64) -> StrResult<&mut Value> {
let len = self.len();
self.locate(index)
self.locate_opt(index, false)
.and_then(move |i| self.0.make_mut().get_mut(i))
.ok_or_else(|| out_of_bounds_no_default(index, len))
}
@ -105,42 +101,21 @@ impl Array {
/// Insert a value at the specified index.
pub fn insert(&mut self, index: i64, value: Value) -> StrResult<()> {
let len = self.len();
let i = self
.locate(index)
.filter(|&i| i <= self.0.len())
.ok_or_else(|| out_of_bounds(index, len))?;
let i = self.locate(index, true)?;
self.0.insert(i, value);
Ok(())
}
/// Remove and return the value at the specified index.
pub fn remove(&mut self, index: i64) -> StrResult<Value> {
let len = self.len();
let i = self
.locate(index)
.filter(|&i| i < self.0.len())
.ok_or_else(|| out_of_bounds(index, len))?;
let i = self.locate(index, false)?;
Ok(self.0.remove(i))
}
/// Extract a contiguous subregion of the array.
pub fn slice(&self, start: i64, end: Option<i64>) -> StrResult<Self> {
let len = self.len();
let start = self
.locate(start)
.filter(|&start| start <= self.0.len())
.ok_or_else(|| out_of_bounds(start, len))?;
let end = end.unwrap_or(self.len() as i64);
let end = self
.locate(end)
.filter(|&end| end <= self.0.len())
.ok_or_else(|| out_of_bounds(end, len))?
.max(start);
let start = self.locate(start, true)?;
let end = self.locate(end.unwrap_or(self.len() as i64), true)?.max(start);
Ok(self.0[start..end].into())
}
@ -371,26 +346,6 @@ impl Array {
Ok(self.iter().cloned().cycle().take(count).collect())
}
/// Extract a slice of the whole array.
pub fn as_slice(&self) -> &[Value] {
self.0.as_slice()
}
/// Iterate over references to the contained values.
pub fn iter(&self) -> std::slice::Iter<Value> {
self.0.iter()
}
/// Resolve an index.
fn locate(&self, index: i64) -> Option<usize> {
usize::try_from(if index >= 0 {
index
} else {
(self.len() as i64).checked_add(index)?
})
.ok()
}
/// Enumerate all items in the array.
pub fn enumerate(&self, start: i64) -> StrResult<Self> {
self.iter()
@ -438,11 +393,44 @@ impl Array {
Ok(Self(out))
}
/// Extract a slice of the whole array.
pub fn as_slice(&self) -> &[Value] {
self.0.as_slice()
}
/// Iterate over references to the contained values.
pub fn iter(&self) -> std::slice::Iter<Value> {
self.0.iter()
}
/// Resolve an index or throw an out of bounds error.
fn locate(&self, index: i64, end_ok: bool) -> StrResult<usize> {
self.locate_opt(index, end_ok)
.ok_or_else(|| out_of_bounds(index, self.len()))
}
/// Resolve an index, if it is within bounds.
///
/// `index == len` is considered in bounds if and only if `end_ok` is true.
fn locate_opt(&self, index: i64, end_ok: bool) -> Option<usize> {
let wrapped =
if index >= 0 { Some(index) } else { (self.len() as i64).checked_add(index) };
wrapped
.and_then(|v| usize::try_from(v).ok())
.filter(|&v| v < self.0.len() + end_ok as usize)
}
}
impl Debug for Array {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
let pieces: Vec<_> = self.iter().map(|value| eco_format!("{value:?}")).collect();
let max = 40;
let mut pieces: Vec<_> =
self.iter().take(max).map(|value| eco_format!("{value:?}")).collect();
if self.len() > max {
pieces.push(eco_format!(".. ({} items omitted)", self.len() - max));
}
f.write_str(&pretty_array_like(&pieces, self.len() == 1))
}
}

View File

@ -0,0 +1,111 @@
use std::borrow::Cow;
use std::fmt::{self, Debug, Formatter};
use std::ops::Deref;
use std::sync::Arc;
use comemo::Prehashed;
use ecow::{eco_format, EcoString};
use crate::diag::StrResult;
use super::Value;
/// A shared byte buffer that is cheap to clone and hash.
#[derive(Clone, Hash, Eq, PartialEq)]
pub struct Bytes(Arc<Prehashed<Cow<'static, [u8]>>>);
impl Bytes {
/// Create a buffer from a static byte slice.
pub fn from_static(slice: &'static [u8]) -> Self {
Self(Arc::new(Prehashed::new(Cow::Borrowed(slice))))
}
/// Get the byte at the given index.
pub fn at(&self, index: i64, default: Option<Value>) -> StrResult<Value> {
self.locate_opt(index)
.and_then(|i| self.0.get(i).map(|&b| Value::Int(b as i64)))
.or(default)
.ok_or_else(|| out_of_bounds_no_default(index, self.len()))
}
/// Extract a contiguous subregion of the bytes.
pub fn slice(&self, start: i64, end: Option<i64>) -> StrResult<Self> {
let start = self.locate(start)?;
let end = self.locate(end.unwrap_or(self.len() as i64))?.max(start);
Ok(self.0[start..end].into())
}
/// Return a view into the buffer.
pub fn as_slice(&self) -> &[u8] {
self
}
/// Return a copy of the buffer as a vector.
pub fn to_vec(&self) -> Vec<u8> {
self.0.to_vec()
}
/// Resolve an index or throw an out of bounds error.
fn locate(&self, index: i64) -> StrResult<usize> {
self.locate_opt(index).ok_or_else(|| out_of_bounds(index, self.len()))
}
/// Resolve an index, if it is within bounds.
///
/// `index == len` is considered in bounds.
fn locate_opt(&self, index: i64) -> Option<usize> {
let wrapped =
if index >= 0 { Some(index) } else { (self.len() as i64).checked_add(index) };
wrapped
.and_then(|v| usize::try_from(v).ok())
.filter(|&v| v <= self.0.len())
}
}
impl From<&[u8]> for Bytes {
fn from(slice: &[u8]) -> Self {
Self(Arc::new(Prehashed::new(slice.to_vec().into())))
}
}
impl From<Vec<u8>> for Bytes {
fn from(vec: Vec<u8>) -> Self {
Self(Arc::new(Prehashed::new(vec.into())))
}
}
impl Deref for Bytes {
type Target = [u8];
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl AsRef<[u8]> for Bytes {
fn as_ref(&self) -> &[u8] {
self
}
}
impl Debug for Bytes {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
write!(f, "bytes({})", self.len())
}
}
/// The out of bounds access error message.
#[cold]
fn out_of_bounds(index: i64, len: usize) -> EcoString {
eco_format!("byte index out of bounds (index: {index}, len: {len})")
}
/// The out of bounds access error message when no default value was given.
#[cold]
fn out_of_bounds_no_default(index: i64, len: usize) -> EcoString {
eco_format!(
"byte index out of bounds (index: {index}, len: {len}) \
and no default value was specified",
)
}

View File

@ -49,12 +49,12 @@ impl Dict {
}
/// Borrow the value the given `key` maps to,
pub fn at<'a>(
&'a self,
key: &str,
default: Option<&'a Value>,
) -> StrResult<&'a Value> {
self.0.get(key).or(default).ok_or_else(|| missing_key_no_default(key))
pub fn at(&self, key: &str, default: Option<Value>) -> StrResult<Value> {
self.0
.get(key)
.cloned()
.or(default)
.ok_or_else(|| missing_key_no_default(key))
}
/// Mutably borrow the value the given `key` maps to.
@ -140,8 +140,10 @@ impl Debug for Dict {
return f.write_str("(:)");
}
let pieces: Vec<_> = self
let max = 40;
let mut pieces: Vec<_> = self
.iter()
.take(max)
.map(|(key, value)| {
if is_ident(key) {
eco_format!("{key}: {value:?}")
@ -151,6 +153,10 @@ impl Debug for Dict {
})
.collect();
if self.len() > max {
pieces.push(eco_format!(".. ({} pairs omitted)", self.len() - max));
}
f.write_str(&pretty_array_like(&pieces, false))
}
}

View File

@ -55,11 +55,10 @@ pub fn call(
"len" => string.len().into_value(),
"first" => string.first().at(span)?.into_value(),
"last" => string.last().at(span)?.into_value(),
"at" => {
let index = args.expect("index")?;
let default = args.named::<EcoString>("default")?;
string.at(index, default.as_deref()).at(span)?.into_value()
}
"at" => string
.at(args.expect("index")?, args.named("default")?)
.at(span)?
.into_value(),
"slice" => {
let start = args.expect("start")?;
let mut end = args.eat()?;
@ -93,11 +92,25 @@ pub fn call(
_ => return missing(),
},
Value::Bytes(bytes) => match method {
"len" => bytes.len().into_value(),
"at" => bytes.at(args.expect("index")?, args.named("default")?).at(span)?,
"slice" => {
let start = args.expect("start")?;
let mut end = args.eat()?;
if end.is_none() {
end = args.named("count")?.map(|c: i64| start + c);
}
bytes.slice(start, end).at(span)?.into_value()
}
_ => return missing(),
},
Value::Content(content) => match method {
"func" => content.func().into_value(),
"has" => content.has(&args.expect::<EcoString>("field")?).into_value(),
"at" => content
.at(&args.expect::<EcoString>("field")?, args.named("default")?)
.at(&args.expect::<Str>("field")?, args.named("default")?)
.at(span)?,
"fields" => content.dict().into_value(),
"location" => content
@ -112,10 +125,7 @@ pub fn call(
"len" => array.len().into_value(),
"first" => array.first().at(span)?.clone(),
"last" => array.last().at(span)?.clone(),
"at" => array
.at(args.expect("index")?, args.named("default")?.as_ref())
.at(span)?
.clone(),
"at" => array.at(args.expect("index")?, args.named("default")?).at(span)?,
"slice" => {
let start = args.expect("start")?;
let mut end = args.eat()?;
@ -157,9 +167,8 @@ pub fn call(
Value::Dict(dict) => match method {
"len" => dict.len().into_value(),
"at" => dict
.at(&args.expect::<Str>("key")?, args.named("default")?.as_ref())
.at(span)?
.clone(),
.at(&args.expect::<Str>("key")?, args.named("default")?)
.at(span)?,
"keys" => dict.keys().into_value(),
"values" => dict.values().into_value(),
"pairs" => dict.pairs().into_value(),
@ -396,6 +405,7 @@ pub fn methods_on(type_name: &str) -> &[(&'static str, bool)] {
("starts-with", true),
("trim", true),
],
"bytes" => &[("len", false), ("at", true), ("slice", true)],
"content" => &[
("func", false),
("has", true),

View File

@ -14,6 +14,7 @@ mod str;
mod value;
mod args;
mod auto;
mod bytes;
mod datetime;
mod fields;
mod func;
@ -40,6 +41,7 @@ pub use typst_macros::{func, symbols};
pub use self::args::{Arg, Args};
pub use self::array::{array, Array};
pub use self::auto::AutoValue;
pub use self::bytes::Bytes;
pub use self::cast::{
cast, Cast, CastInfo, FromValue, IntoResult, IntoValue, Never, Reflect, Variadics,
};
@ -1371,7 +1373,7 @@ where
let Ok(v) = value.at(i as i64, None) else {
bail!(expr.span(), "not enough elements to destructure");
};
f(vm, expr, v.clone())?;
f(vm, expr, v)?;
i += 1;
}
ast::DestructuringKind::Sink(spread) => {
@ -1423,7 +1425,7 @@ where
.at(&ident, None)
.map_err(|_| "destructuring key not found in dictionary")
.at(ident.span())?;
f(vm, ast::Expr::Ident(ident.clone()), v.clone())?;
f(vm, ast::Expr::Ident(ident.clone()), v)?;
used.insert(ident.take());
}
ast::DestructuringKind::Sink(spread) => sink = spread.expr(),
@ -1433,7 +1435,7 @@ where
.at(&name, None)
.map_err(|_| "destructuring key not found in dictionary")
.at(name.span())?;
f(vm, named.expr(), v.clone())?;
f(vm, named.expr(), v)?;
used.insert(name.take());
}
ast::DestructuringKind::Placeholder(_) => {}

View File

@ -347,6 +347,7 @@ pub fn equal(lhs: &Value, rhs: &Value) -> bool {
(Color(a), Color(b)) => a == b,
(Symbol(a), Symbol(b)) => a == b,
(Str(a), Str(b)) => a == b,
(Bytes(a), Bytes(b)) => a == b,
(Label(a), Label(b)) => a == b,
(Content(a), Content(b)) => a == b,
(Array(a), Array(b)) => a == b,

View File

@ -68,14 +68,12 @@ impl Str {
}
/// Extract the grapheme cluster at the given index.
pub fn at<'a>(&'a self, index: i64, default: Option<&'a str>) -> StrResult<Self> {
pub fn at(&self, index: i64, default: Option<Value>) -> StrResult<Value> {
let len = self.len();
let grapheme = self
.locate_opt(index)?
.and_then(|i| self.0[i..].graphemes(true).next())
self.locate_opt(index)?
.and_then(|i| self.0[i..].graphemes(true).next().map(|s| s.into_value()))
.or(default)
.ok_or_else(|| no_default_and_out_of_bounds(index, len))?;
Ok(grapheme.into())
.ok_or_else(|| no_default_and_out_of_bounds(index, len))
}
/// Extract a contiguous substring.
@ -324,8 +322,15 @@ impl Str {
Ok(Self(self.0.repeat(n)))
}
/// Resolve an index, if it is within bounds.
/// Errors on invalid char boundaries.
/// Resolve an index or throw an out of bounds error.
fn locate(&self, index: i64) -> StrResult<usize> {
self.locate_opt(index)?
.ok_or_else(|| out_of_bounds(index, self.len()))
}
/// Resolve an index, if it is within bounds and on a valid char boundary.
///
/// `index == len` is considered in bounds.
fn locate_opt(&self, index: i64) -> StrResult<Option<usize>> {
let wrapped =
if index >= 0 { Some(index) } else { (self.len() as i64).checked_add(index) };
@ -340,12 +345,6 @@ impl Str {
Ok(resolved)
}
/// Resolve an index or throw an out of bounds error.
fn locate(&self, index: i64) -> StrResult<usize> {
self.locate_opt(index)?
.ok_or_else(|| out_of_bounds(index, self.len()))
}
}
/// The out of bounds access error message.

View File

@ -8,14 +8,13 @@ use ecow::eco_format;
use siphasher::sip128::{Hasher128, SipHasher13};
use super::{
cast, fields, format_str, ops, Args, Array, CastInfo, Content, Dict, FromValue, Func,
IntoValue, Module, Reflect, Str, Symbol,
cast, fields, format_str, ops, Args, Array, Bytes, CastInfo, Content, Dict,
FromValue, Func, IntoValue, Module, Reflect, Str, Symbol,
};
use crate::diag::StrResult;
use crate::geom::{Abs, Angle, Color, Em, Fr, Length, Ratio, Rel};
use crate::model::{Label, Styles};
use crate::syntax::{ast, Span};
use crate::util::Bytes;
/// A computational value.
#[derive(Default, Clone)]
@ -132,7 +131,7 @@ impl Value {
pub fn field(&self, field: &str) -> StrResult<Value> {
match self {
Self::Symbol(symbol) => symbol.clone().modified(field).map(Self::Symbol),
Self::Dict(dict) => dict.at(field, None).cloned(),
Self::Dict(dict) => dict.at(field, None),
Self::Content(content) => content.at(field, None),
Self::Module(module) => module.get(field).cloned(),
Self::Func(func) => func.get(field).cloned(),

View File

@ -7,8 +7,9 @@ use ttf_parser::{name_id, GlyphId, Tag};
use unicode_general_category::GeneralCategory;
use super::{deflate, EmExt, PdfContext, RefExt};
use crate::eval::Bytes;
use crate::font::Font;
use crate::util::{Bytes, SliceExt};
use crate::util::SliceExt;
const CMAP_NAME: Name = Name(b"Custom");
const SYSTEM_INFO: SystemInfo = SystemInfo {

View File

@ -4,8 +4,8 @@ use image::{DynamicImage, GenericImageView, Rgba};
use pdf_writer::{Filter, Finish};
use super::{deflate, PdfContext, RefExt};
use crate::eval::Bytes;
use crate::image::{DecodedImage, Image, RasterFormat};
use crate::util::Bytes;
/// Embed all used images into the PDF.
#[tracing::instrument(skip_all)]

View File

@ -13,9 +13,8 @@ use std::sync::Arc;
use ttf_parser::GlyphId;
use self::book::find_name;
use crate::eval::Cast;
use crate::eval::{Bytes, Cast};
use crate::geom::Em;
use crate::util::Bytes;
/// An OpenType font.
///

View File

@ -896,13 +896,13 @@ fn code_completions(ctx: &mut CompletionContext, hashtag: bool) {
);
ctx.snippet_completion(
"array",
"array literal",
"(${1, 2, 3})",
"Creates a sequence of values.",
);
ctx.snippet_completion(
"dictionary",
"dictionary literal",
"(${a: 1, b: 2})",
"Creates a mapping from names to value.",
);

View File

@ -16,9 +16,9 @@ use image::{ImageDecoder, ImageResult};
use usvg::{TreeParsing, TreeTextToPath};
use crate::diag::{format_xml_like_error, StrResult};
use crate::eval::Bytes;
use crate::font::Font;
use crate::geom::Axes;
use crate::util::Bytes;
use crate::World;
/// A raster or vector image.

View File

@ -60,10 +60,9 @@ use ecow::EcoString;
use crate::diag::{FileResult, SourceResult};
use crate::doc::Document;
use crate::eval::{Datetime, Library, Route, Tracer};
use crate::eval::{Bytes, Datetime, Library, Route, Tracer};
use crate::font::{Font, FontBook};
use crate::syntax::{FileId, PackageSpec, Source, Span};
use crate::util::Bytes;
/// Compile a source file into a fully layouted document.
#[tracing::instrument(skip_all)]

View File

@ -1,59 +0,0 @@
use std::borrow::Cow;
use std::fmt::{self, Debug, Formatter};
use std::ops::Deref;
use std::sync::Arc;
use comemo::Prehashed;
/// A shared byte buffer that is cheap to clone and hash.
#[derive(Clone, Hash, Eq, PartialEq)]
pub struct Bytes(Arc<Prehashed<Cow<'static, [u8]>>>);
impl Bytes {
/// Create a buffer from a static byte slice.
pub fn from_static(slice: &'static [u8]) -> Self {
Self(Arc::new(Prehashed::new(Cow::Borrowed(slice))))
}
/// Return a view into the buffer.
pub fn as_slice(&self) -> &[u8] {
self
}
/// Return a copy of the buffer as a vector.
pub fn to_vec(&self) -> Vec<u8> {
self.0.to_vec()
}
}
impl From<&[u8]> for Bytes {
fn from(slice: &[u8]) -> Self {
Self(Arc::new(Prehashed::new(slice.to_vec().into())))
}
}
impl From<Vec<u8>> for Bytes {
fn from(vec: Vec<u8>) -> Self {
Self(Arc::new(Prehashed::new(vec.into())))
}
}
impl Deref for Bytes {
type Target = [u8];
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl AsRef<[u8]> for Bytes {
fn as_ref(&self) -> &[u8] {
self
}
}
impl Debug for Bytes {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
write!(f, "bytes({})", self.len())
}
}

View File

@ -2,10 +2,6 @@
pub mod fat;
mod bytes;
pub use bytes::Bytes;
use std::fmt::{self, Debug, Formatter};
use std::hash::Hash;
use std::num::NonZeroUsize;

View File

@ -46,6 +46,8 @@ integers, integers cannot be smaller than `{-9223372036854775808}` or larger tha
The number can also be specified as hexadecimal, octal, or binary by starting it
with a zero followed by either `x`, `o`, or `b`.
You can convert a value to an integer with the [`float`]($func/float) function.
## Example
```example
#(1 + 2) \
@ -64,6 +66,8 @@ A limited-precision representation of a real number. Typst uses 64 bits to
store floats. Wherever a float is expected, you can also pass an
[integer]($type/integer).
You can convert a value to a float with the [`float`]($func/float) function.
## Example
```example
#3.14 \
@ -87,6 +91,8 @@ A length has the following fields:
- `abs`: A length with just the absolute component of the current length
(that is, excluding the `em` component).
You can multiply lengths with and divide them by integers and floats.
## Example
```example
#rect(width: 20pt)
@ -458,6 +464,65 @@ $arrow.r$ \
$arrow.t.quad$
```
# Bytes
A sequence of bytes.
This is conceptually similar to an array of [integers]($type/integer) between
`{0}` and `{255}`, but represented much more efficiently.
You can convert
- a [string]($type/string) or an [array]($type/array) of integers to bytes with
the [`bytes`]($func/bytes) function
- bytes to a string with the [`str`]($func/str) function
- bytes to an array of integers with the [`array`]($func/array) function
When [reading]($func/read) data from a file, you can decide whether to load it
as a string or as raw bytes.
```example
#bytes((123, 160, 22, 0)) \
#bytes("Hello 😃")
#let data = read(
"rhino.png",
encoding: none,
)
// Magic bytes.
#array(data.slice(0, 4)) \
#str(data.slice(1, 4))
```
## Methods
### len()
The length in bytes.
- returns: integer
### at()
Returns the byte at the specified index. Returns the default value if the index
is out of bounds or fails with an error if no default value was specified.
- index: integer (positional, required)
The index at which to retrieve the byte.
- default: any (named)
A default value to return if the index is out of bounds.
- returns: integer or any
### slice()
Extract a subslice of the bytes.
Fails with an error if the start or index is out of bounds.
- start: integer (positional, required)
The start index (inclusive).
- end: integer (positional)
The end index (exclusive). If omitted, the whole slice until the end is
extracted.
- count: integer (named)
The number of bytes to extract. This is equivalent to passing
`start + count` as the `end` position. Mutually exclusive with `end`.
- returns: bytes
# String
A sequence of Unicode codepoints.
@ -475,6 +540,8 @@ quite versatile.
All lengths and indices are expressed in terms of UTF-8 characters. Indices are
zero-based and negative indices wrap around to the end of the string.
You can convert a value to a string with the [`str`]($func/str) function.
### Example
```example
#"hello world!" \
@ -521,7 +588,7 @@ value was specified.
The byte index.
- default: any (named)
A default value to return if the index is out of bounds.
- returns: string
- returns: string or any
### slice()
Extract a substring of the string.
@ -839,8 +906,8 @@ Fails with an error if the start or index is out of bounds.
The end index (exclusive). If omitted, the whole slice until the end of the
array is extracted.
- count: integer (named)
The number of items to extract. This is equivalent to passing `start +
count` as the `end` position. Mutually exclusive with `end`.
The number of items to extract. This is equivalent to passing
`start + count` as the `end` position. Mutually exclusive with `end`.
- returns: array
### contains()

View File

@ -1,11 +1,10 @@
use comemo::{Prehashed, Track, Tracked};
use iai::{black_box, main, Iai};
use typst::diag::FileResult;
use typst::eval::{Datetime, Library, Tracer};
use typst::eval::{Bytes, Datetime, Library, Tracer};
use typst::font::{Font, FontBook};
use typst::geom::Color;
use typst::syntax::{FileId, Source};
use typst::util::Bytes;
use typst::World;
use unscanny::Scanner;

View File

@ -22,11 +22,11 @@ use walkdir::WalkDir;
use typst::diag::{bail, FileError, FileResult, Severity, StrResult};
use typst::doc::{Document, Frame, FrameItem, Meta};
use typst::eval::{eco_format, func, Datetime, Library, NoneValue, Tracer, Value};
use typst::eval::{eco_format, func, Bytes, Datetime, Library, NoneValue, Tracer, Value};
use typst::font::{Font, FontBook};
use typst::geom::{Abs, Color, RgbaColor, Smart};
use typst::syntax::{FileId, Source, Span, SyntaxNode};
use typst::util::{Bytes, PathExt};
use typst::util::PathExt;
use typst::World;
use typst_library::layout::{Margin, PageElem};
use typst_library::text::{TextElem, TextSize};

View File

@ -0,0 +1,21 @@
// Test the bytes type.
// Ref: false
---
#let data = read("/files/rhino.png", encoding: none)
#test(data.len(), 232243)
#test(data.slice(0, count: 5), bytes((137, 80, 78, 71, 13)))
#test(str(data.slice(1, 4)), "PNG")
#test(repr(data), "bytes(232243)")
---
#test(str(bytes(range(0x41, 0x50))), "ABCDEFGHIJKLMNO")
#test(array(bytes("Hello")), (0x48, 0x65, 0x6C, 0x6C, 0x6F))
---
// Error: 8-14 expected string, array, or bytes, found dictionary
#bytes((a: 1))
---
// Error: 8-15 expected bytes or array, found string
#array("hello")

View File

@ -41,8 +41,7 @@
#"Hello".at(5)
---
// Error: 25-32 expected string, found dictionary
#"Hello".at(5, default: (a: 10))
#test("Hello".at(5, default: (a: 10)), (a: 10))
---
// Test the `slice` method.

View File

@ -18,11 +18,11 @@
#test(calc.round(calc.pi, digits: 2), 3.14)
---
// Error: 6-10 expected boolean, integer, float, or string, found length
// Error: 6-10 expected boolean, float, string, or integer, found length
#int(10pt)
---
// Error: 8-13 expected boolean, integer, float, ratio, or string, found function
// Error: 8-13 expected boolean, integer, ratio, string, or float, found function
#float(float)
---

View File

@ -103,7 +103,7 @@
#test(str(10 / 3).len() > 10, true)
---
// Error: 6-8 expected integer, float, label, or string, found content
// Error: 6-8 expected integer, float, label, bytes, or string, found content
#str([])
---