More flexible and efficient Bytes representation (#5670)

This commit is contained in:
Laurenz 2025-01-08 11:57:56 +01:00 committed by GitHub
parent 0a374d2380
commit dacd6acd5e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
18 changed files with 160 additions and 85 deletions

View File

@ -305,7 +305,7 @@ impl FileSlot {
) -> FileResult<Bytes> {
self.file.get_or_init(
|| read(self.id, project_root, package_storage),
|data, _| Ok(data.into()),
|data, _| Ok(Bytes::new(data)),
)
}
}

View File

@ -55,7 +55,7 @@ impl TestWorld {
pub fn with_asset_at(mut self, path: &str, filename: &str) -> Self {
let id = FileId::new(None, VirtualPath::new(path));
let data = typst_dev_assets::get_by_name(filename).unwrap();
let bytes = Bytes::from_static(data);
let bytes = Bytes::new(data);
Arc::make_mut(&mut self.files).assets.insert(id, bytes);
self
}
@ -152,7 +152,7 @@ impl Default for TestBase {
fn default() -> Self {
let fonts: Vec<_> = typst_assets::fonts()
.chain(typst_dev_assets::fonts())
.flat_map(|data| Font::iter(Bytes::from_static(data)))
.flat_map(|data| Font::iter(Bytes::new(data)))
.collect();
Self {

View File

@ -13,6 +13,7 @@ use std::path::{Path, PathBuf};
use std::sync::OnceLock;
use fontdb::{Database, Source};
use typst_library::foundations::Bytes;
use typst_library::text::{Font, FontBook, FontInfo};
use typst_timing::TimingScope;
@ -52,9 +53,8 @@ impl FontSlot {
.as_ref()
.expect("`path` is not `None` if `font` is uninitialized"),
)
.ok()?
.into();
Font::new(data, self.index)
.ok()?;
Font::new(Bytes::new(data), self.index)
})
.clone()
}
@ -196,7 +196,7 @@ impl FontSearcher {
#[cfg(feature = "embed-fonts")]
fn add_embedded(&mut self) {
for data in typst_assets::fonts() {
let buffer = typst_library::foundations::Bytes::from_static(data);
let buffer = Bytes::new(data);
for (i, font) in Font::iter(buffer).enumerate() {
self.book.push(font.info().clone());
self.fonts.push(FontSlot {

View File

@ -50,7 +50,7 @@ pub fn layout_image(
// Construct the image itself.
let image = Image::with_fonts(
data.clone().into(),
data.clone().into_bytes(),
format,
elem.alt(styles),
engine.world,

View File

@ -1,5 +1,6 @@
use std::borrow::Cow;
use std::any::Any;
use std::fmt::{self, Debug, Formatter};
use std::hash::{Hash, Hasher};
use std::ops::{Add, AddAssign, Deref};
use std::sync::Arc;
@ -39,18 +40,44 @@ use crate::foundations::{cast, func, scope, ty, Array, Reflect, Repr, Str, Value
/// #str(data.slice(1, 4))
/// ```
#[ty(scope, cast)]
#[derive(Clone, Hash, Eq, PartialEq)]
pub struct Bytes(Arc<LazyHash<Cow<'static, [u8]>>>);
#[derive(Clone, Hash)]
#[allow(clippy::derived_hash_with_manual_eq)]
pub struct Bytes(Arc<LazyHash<dyn Bytelike>>);
impl Bytes {
/// Create a buffer from a static byte slice.
pub fn from_static(slice: &'static [u8]) -> Self {
Self(Arc::new(LazyHash::new(Cow::Borrowed(slice))))
/// Create `Bytes` from anything byte-like.
///
/// The `data` type will directly back this bytes object. This means you can
/// e.g. pass `&'static [u8]` or `[u8; 8]` and no extra vector will be
/// allocated.
///
/// If the type is `Vec<u8>` and the `Bytes` are unique (i.e. not cloned),
/// the vector will be reused when mutating to the `Bytes`.
///
/// If your source type is a string, prefer [`Bytes::from_string`] to
/// directly use the UTF-8 encoded string data without any copying.
pub fn new<T>(data: T) -> Self
where
T: AsRef<[u8]> + Send + Sync + 'static,
{
Self(Arc::new(LazyHash::new(data)))
}
/// Create `Bytes` from anything string-like, implicitly viewing the UTF-8
/// representation.
///
/// The `data` type will directly back this bytes object. This means you can
/// e.g. pass `String` or `EcoString` without any copying.
pub fn from_string<T>(data: T) -> Self
where
T: AsRef<str> + Send + Sync + 'static,
{
Self(Arc::new(LazyHash::new(StrWrapper(data))))
}
/// Return `true` if the length is 0.
pub fn is_empty(&self) -> bool {
self.0.is_empty()
self.as_slice().is_empty()
}
/// Return a view into the buffer.
@ -60,7 +87,7 @@ impl Bytes {
/// Return a copy of the buffer as a vector.
pub fn to_vec(&self) -> Vec<u8> {
self.0.to_vec()
self.as_slice().to_vec()
}
/// Resolve an index or throw an out of bounds error.
@ -72,12 +99,10 @@ impl Bytes {
///
/// `index == len` is considered in bounds.
fn locate_opt(&self, index: i64) -> Option<usize> {
let len = self.as_slice().len();
let wrapped =
if index >= 0 { Some(index) } else { (self.len() as i64).checked_add(index) };
wrapped
.and_then(|v| usize::try_from(v).ok())
.filter(|&v| v <= self.0.len())
if index >= 0 { Some(index) } else { (len as i64).checked_add(index) };
wrapped.and_then(|v| usize::try_from(v).ok()).filter(|&v| v <= len)
}
}
@ -106,7 +131,7 @@ impl Bytes {
/// The length in bytes.
#[func(title = "Length")]
pub fn len(&self) -> usize {
self.0.len()
self.as_slice().len()
}
/// Returns the byte at the specified index. Returns the default value if
@ -122,13 +147,13 @@ impl Bytes {
default: Option<Value>,
) -> StrResult<Value> {
self.locate_opt(index)
.and_then(|i| self.0.get(i).map(|&b| Value::Int(b.into())))
.and_then(|i| self.as_slice().get(i).map(|&b| Value::Int(b.into())))
.or(default)
.ok_or_else(|| out_of_bounds_no_default(index, self.len()))
}
/// Extracts a subslice of the bytes. Fails with an error if the start or end
/// index is out of bounds.
/// Extracts a subslice of the bytes. Fails with an error if the start or
/// end index is out of bounds.
#[func]
pub fn slice(
&self,
@ -148,9 +173,17 @@ impl Bytes {
if end.is_none() {
end = count.map(|c: i64| start + c);
}
let start = self.locate(start)?;
let end = self.locate(end.unwrap_or(self.len() as i64))?.max(start);
Ok(self.0[start..end].into())
let slice = &self.as_slice()[start..end];
// We could hold a view into the original bytes here instead of
// making a copy, but it's unclear when that's worth it. Java
// originally did that for strings, but went back on it because a
// very small view into a very large buffer would be a sort of
// memory leak.
Ok(Bytes::new(slice.to_vec()))
}
}
@ -170,7 +203,15 @@ impl Deref for Bytes {
type Target = [u8];
fn deref(&self) -> &Self::Target {
&self.0
self.0.as_bytes()
}
}
impl Eq for Bytes {}
impl PartialEq for Bytes {
fn eq(&self, other: &Self) -> bool {
self.0.eq(&other.0)
}
}
@ -180,18 +221,6 @@ impl AsRef<[u8]> for Bytes {
}
}
impl From<&[u8]> for Bytes {
fn from(slice: &[u8]) -> Self {
Self(Arc::new(LazyHash::new(slice.to_vec().into())))
}
}
impl From<Vec<u8>> for Bytes {
fn from(vec: Vec<u8>) -> Self {
Self(Arc::new(LazyHash::new(vec.into())))
}
}
impl Add for Bytes {
type Output = Self;
@ -207,10 +236,12 @@ impl AddAssign for Bytes {
// Nothing to do
} else if self.is_empty() {
*self = rhs;
} else if Arc::strong_count(&self.0) == 1 && matches!(**self.0, Cow::Owned(_)) {
Arc::make_mut(&mut self.0).to_mut().extend_from_slice(&rhs);
} else if let Some(vec) = Arc::get_mut(&mut self.0)
.and_then(|unique| unique.as_any_mut().downcast_mut::<Vec<u8>>())
{
vec.extend_from_slice(&rhs);
} else {
*self = Self::from([self.as_slice(), rhs.as_slice()].concat());
*self = Self::new([self.as_slice(), rhs.as_slice()].concat());
}
}
}
@ -228,20 +259,61 @@ impl Serialize for Bytes {
}
}
/// Any type that can back a byte buffer.
trait Bytelike: Send + Sync {
fn as_bytes(&self) -> &[u8];
fn as_any_mut(&mut self) -> &mut dyn Any;
}
impl<T> Bytelike for T
where
T: AsRef<[u8]> + Send + Sync + 'static,
{
fn as_bytes(&self) -> &[u8] {
self.as_ref()
}
fn as_any_mut(&mut self) -> &mut dyn Any {
self
}
}
impl Hash for dyn Bytelike {
fn hash<H: Hasher>(&self, state: &mut H) {
self.as_bytes().hash(state);
}
}
/// Makes string-like objects usable with `Bytes`.
struct StrWrapper<T>(T);
impl<T> Bytelike for StrWrapper<T>
where
T: AsRef<str> + Send + Sync + 'static,
{
fn as_bytes(&self) -> &[u8] {
self.0.as_ref().as_bytes()
}
fn as_any_mut(&mut self) -> &mut dyn Any {
self
}
}
/// A value that can be cast to bytes.
pub struct ToBytes(Bytes);
cast! {
ToBytes,
v: Str => Self(v.as_bytes().into()),
v: Str => Self(Bytes::from_string(v)),
v: Array => Self(v.iter()
.map(|item| match item {
Value::Int(byte @ 0..=255) => Ok(*byte as u8),
Value::Int(_) => bail!("number must be between 0 and 255"),
value => Err(<u8 as Reflect>::error(value)),
})
.collect::<Result<Vec<u8>, _>>()?
.into()
.collect::<Result<Vec<u8>, _>>()
.map(Bytes::new)?
),
v: Bytes => Self(v),
}

View File

@ -163,18 +163,14 @@ impl f64 {
size: u32,
) -> StrResult<Bytes> {
Ok(match size {
8 => match endian {
8 => Bytes::new(match endian {
Endianness::Little => self.to_le_bytes(),
Endianness::Big => self.to_be_bytes(),
}
.as_slice()
.into(),
4 => match endian {
}),
4 => Bytes::new(match endian {
Endianness::Little => (self as f32).to_le_bytes(),
Endianness::Big => (self as f32).to_be_bytes(),
}
.as_slice()
.into(),
}),
_ => bail!("size must be either 4 or 8"),
})
}

View File

@ -1,6 +1,7 @@
use std::num::{NonZeroI64, NonZeroIsize, NonZeroU64, NonZeroUsize, ParseIntError};
use ecow::{eco_format, EcoString};
use smallvec::SmallVec;
use crate::diag::{bail, StrResult};
use crate::foundations::{
@ -322,7 +323,7 @@ impl i64 {
Endianness::Little => self.to_le_bytes(),
};
let mut buf = vec![0u8; size];
let mut buf = SmallVec::<[u8; 8]>::from_elem(0, size);
match endian {
Endianness::Big => {
// Copy the bytes from the array to the buffer, starting from
@ -339,7 +340,7 @@ impl i64 {
}
}
Bytes::from(buf)
Bytes::new(buf)
}
}

View File

@ -293,7 +293,7 @@ impl Plugin {
_ => bail!("plugin did not respect the protocol"),
};
Ok(output.into())
Ok(Bytes::new(output))
}
/// An iterator over all the function names defined by the plugin.

View File

@ -459,15 +459,15 @@ impl<'de> Visitor<'de> for ValueVisitor {
}
fn visit_bytes<E: Error>(self, v: &[u8]) -> Result<Self::Value, E> {
Ok(Bytes::from(v).into_value())
Ok(Bytes::new(v.to_vec()).into_value())
}
fn visit_borrowed_bytes<E: Error>(self, v: &'de [u8]) -> Result<Self::Value, E> {
Ok(Bytes::from(v).into_value())
Ok(Bytes::new(v.to_vec()).into_value())
}
fn visit_byte_buf<E: Error>(self, v: Vec<u8>) -> Result<Self::Value, E> {
Ok(Bytes::from(v).into_value())
Ok(Bytes::new(v).into_value())
}
fn visit_none<E: Error>(self) -> Result<Self::Value, E> {

View File

@ -55,7 +55,7 @@ impl cbor {
let Spanned { v: value, span } = value;
let mut res = Vec::new();
ciborium::into_writer(&value, &mut res)
.map(|_| res.into())
.map(|_| Bytes::new(res))
.map_err(|err| eco_format!("failed to encode value as CBOR ({err})"))
.at(span)
}

View File

@ -56,15 +56,22 @@ pub enum Readable {
impl Readable {
pub fn as_slice(&self) -> &[u8] {
match self {
Readable::Bytes(v) => v,
Readable::Str(v) => v.as_bytes(),
Self::Bytes(v) => v,
Self::Str(v) => v.as_bytes(),
}
}
pub fn as_str(&self) -> Option<&str> {
match self {
Readable::Str(v) => Some(v.as_str()),
Readable::Bytes(v) => std::str::from_utf8(v).ok(),
Self::Str(v) => Some(v.as_str()),
Self::Bytes(v) => std::str::from_utf8(v).ok(),
}
}
pub fn into_bytes(self) -> Bytes {
match self {
Self::Bytes(v) => v,
Self::Str(v) => Bytes::from_string(v),
}
}
}
@ -78,12 +85,3 @@ cast! {
v: Str => Self::Str(v),
v: Bytes => Self::Bytes(v),
}
impl From<Readable> for Bytes {
fn from(value: Readable) -> Self {
match value {
Readable::Bytes(v) => v,
Readable::Str(v) => v.as_bytes().into(),
}
}
}

View File

@ -7,6 +7,7 @@ use typst_syntax::Span;
use usvg::tiny_skia_path;
use xmlwriter::XmlWriter;
use crate::foundations::Bytes;
use crate::layout::{Abs, Frame, FrameItem, Point, Size};
use crate::text::{Font, Glyph};
use crate::visualize::{FixedStroke, Geometry, Image, RasterFormat, VectorFormat};
@ -101,8 +102,12 @@ fn draw_raster_glyph(
upem: Abs,
raster_image: ttf_parser::RasterGlyphImage,
) -> Option<()> {
let image =
Image::new(raster_image.data.into(), RasterFormat::Png.into(), None).ok()?;
let image = Image::new(
Bytes::new(raster_image.data.to_vec()),
RasterFormat::Png.into(),
None,
)
.ok()?;
// Apple Color emoji doesn't provide offset information (or at least
// not in a way ttf-parser understands), so we artificially shift their
@ -175,7 +180,7 @@ fn draw_colr_glyph(
let data = svg.end_document().into_bytes();
let image = Image::new(data.into(), VectorFormat::Svg.into(), None).ok()?;
let image = Image::new(Bytes::new(data), VectorFormat::Svg.into(), None).ok()?;
let y_shift = Abs::pt(upem.to_pt() - y_max);
let position = Point::new(Abs::pt(x_min), y_shift);
@ -251,7 +256,7 @@ fn draw_svg_glyph(
);
let image =
Image::new(wrapper_svg.into_bytes().into(), VectorFormat::Svg.into(), None)
Image::new(Bytes::new(wrapper_svg.into_bytes()), VectorFormat::Svg.into(), None)
.ok()?;
let position = Point::new(Abs::pt(left), Abs::pt(top) + upem);

View File

@ -274,7 +274,7 @@ mod tests {
#[track_caller]
fn test(path: &str, format: RasterFormat, dpi: f64) {
let data = typst_dev_assets::get(path).unwrap();
let bytes = Bytes::from_static(data);
let bytes = Bytes::new(data);
let image = RasterImage::new(bytes, format).unwrap();
assert_eq!(image.dpi().map(f64::round), Some(dpi));
}

View File

@ -3,6 +3,7 @@ use std::io::Read;
use base64::Engine;
use ecow::EcoString;
use ttf_parser::GlyphId;
use typst_library::foundations::Bytes;
use typst_library::layout::{Abs, Point, Ratio, Size, Transform};
use typst_library::text::{Font, TextItem};
use typst_library::visualize::{FillRule, Image, Paint, RasterFormat, RelativeTo};
@ -243,7 +244,9 @@ fn convert_bitmap_glyph_to_image(font: &Font, id: GlyphId) -> Option<(Image, f64
if raster.format != ttf_parser::RasterImageFormat::PNG {
return None;
}
let image = Image::new(raster.data.into(), RasterFormat::Png.into(), None).ok()?;
let image =
Image::new(Bytes::new(raster.data.to_vec()), RasterFormat::Png.into(), None)
.ok()?;
Some((image, raster.x as f64, raster.y as f64))
}

View File

@ -486,7 +486,7 @@ impl World for DocWorld {
fn file(&self, id: FileId) -> FileResult<Bytes> {
assert!(id.package().is_none());
Ok(Bytes::from_static(
Ok(Bytes::new(
typst_dev_assets::get_by_name(
&id.vpath().as_rootless_path().to_string_lossy(),
)

View File

@ -78,7 +78,7 @@ static LIBRARY: LazyLock<LazyHash<Library>> = LazyLock::new(|| {
static FONTS: LazyLock<(LazyHash<FontBook>, Vec<Font>)> = LazyLock::new(|| {
let fonts: Vec<_> = typst_assets::fonts()
.chain(typst_dev_assets::fonts())
.flat_map(|data| Font::iter(Bytes::from_static(data)))
.flat_map(|data| Font::iter(Bytes::new(data)))
.collect();
let book = FontBook::from_fonts(&fonts);
(LazyHash::new(book), fonts)

View File

@ -19,7 +19,7 @@ struct FuzzWorld {
impl FuzzWorld {
fn new(text: &str) -> Self {
let data = typst_assets::fonts().next().unwrap();
let font = Font::new(Bytes::from_static(data), 0).unwrap();
let font = Font::new(Bytes::new(data), 0).unwrap();
let book = FontBook::from_fonts([&font]);
Self {
library: LazyHash::new(Library::default()),

View File

@ -98,7 +98,7 @@ impl Default for TestBase {
fn default() -> Self {
let fonts: Vec<_> = typst_assets::fonts()
.chain(typst_dev_assets::fonts())
.flat_map(|data| Font::iter(Bytes::from_static(data)))
.flat_map(|data| Font::iter(Bytes::new(data)))
.collect();
Self {
@ -140,8 +140,8 @@ impl FileSlot {
self.file
.get_or_init(|| {
read(&system_path(self.id)?).map(|cow| match cow {
Cow::Owned(buf) => buf.into(),
Cow::Borrowed(buf) => Bytes::from_static(buf),
Cow::Owned(buf) => Bytes::new(buf),
Cow::Borrowed(buf) => Bytes::new(buf),
})
})
.clone()