From 9fa41da5e8e194d17b3762d4a3b629dd49660105 Mon Sep 17 00:00:00 2001 From: Laurenz Stampfl Date: Thu, 17 Jul 2025 20:42:35 +0200 Subject: [PATCH] First working version for PDF export! --- Cargo.lock | 61 +++++++++++-- Cargo.toml | 5 +- crates/typst-cli/src/args.rs | 2 +- crates/typst-cli/src/world.rs | 2 +- crates/typst-library/Cargo.toml | 1 + crates/typst-library/src/lib.rs | 2 +- .../typst-library/src/visualize/image/mod.rs | 28 +++++- .../typst-library/src/visualize/image/pdf.rs | 87 ++++++++++++++++++- crates/typst-pdf/src/convert.rs | 16 ++++ crates/typst-pdf/src/image.rs | 10 +++ crates/typst-render/src/image.rs | 1 + crates/typst-svg/src/image.rs | 1 + 12 files changed, 196 insertions(+), 20 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index dcd154367..af08e97b0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -964,6 +964,29 @@ dependencies = [ "url", ] +[[package]] +name = "hayro-syntax" +version = "0.0.1" +dependencies = [ + "flate2", + "kurbo", + "log", + "rustc-hash", + "smallvec", + "zune-jpeg 0.5.0-rc5", +] + +[[package]] +name = "hayro-write" +version = "0.1.0" +dependencies = [ + "flate2", + "hayro-syntax", + "lazy_static", + "log", + "pdf-writer", +] + [[package]] name = "heck" version = "0.5.0" @@ -1217,8 +1240,8 @@ dependencies = [ "image-webp", "num-traits", "png", - "zune-core", - "zune-jpeg", + "zune-core 0.4.12", + "zune-jpeg 0.4.14", ] [[package]] @@ -1367,7 +1390,6 @@ dependencies = [ [[package]] name = "krilla" version = "0.4.0" -source = "git+https://github.com/LaurenzV/krilla?rev=20c14fe#20c14fefee5002566b3d6668b338bbe2168784e7" dependencies = [ "base64", "bumpalo", @@ -1376,6 +1398,7 @@ dependencies = [ "float-cmp 0.10.0", "fxhash", "gif", + "hayro-write", "image-webp", "imagesize", "once_cell", @@ -1385,17 +1408,17 @@ dependencies = [ "rustybuzz", "siphasher", "skrifa", + "smallvec", "subsetter", "tiny-skia-path", "xmp-writer", "yoke 0.8.0", - "zune-jpeg", + "zune-jpeg 0.4.14", ] [[package]] name = "krilla-svg" version = "0.1.0" -source = "git+https://github.com/LaurenzV/krilla?rev=20c14fe#20c14fefee5002566b3d6668b338bbe2168784e7" dependencies = [ "flate2", "fontdb", @@ -1416,6 +1439,12 @@ dependencies = [ "smallvec", ] +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + [[package]] name = "libc" version = "0.2.169" @@ -2175,7 +2204,7 @@ dependencies = [ "svgtypes", "tiny-skia", "usvg", - "zune-jpeg", + "zune-jpeg 0.4.14", ] [[package]] @@ -2451,9 +2480,9 @@ dependencies = [ [[package]] name = "smallvec" -version = "1.13.2" +version = "1.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" +checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" [[package]] name = "spin" @@ -3075,6 +3104,7 @@ dependencies = [ "fontdb", "glidesort", "hayagriva", + "hayro-syntax", "icu_properties", "icu_provider", "icu_provider_blob", @@ -3970,11 +4000,24 @@ version = "0.4.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f423a2c17029964870cfaabb1f13dfab7d092a62a29a89264f4d36990ca414a" +[[package]] +name = "zune-core" +version = "0.5.0-rc2" +source = "git+https://github.com/etemesi254/zune-image?rev=ac43af3#ac43af36e7125c120d64392c3eee52528c24b5c4" + [[package]] name = "zune-jpeg" version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "99a5bab8d7dedf81405c4bb1f2b83ea057643d9cb28778cea9eecddeedd2e028" dependencies = [ - "zune-core", + "zune-core 0.4.12", +] + +[[package]] +name = "zune-jpeg" +version = "0.5.0-rc5" +source = "git+https://github.com/etemesi254/zune-image?rev=ac43af3#ac43af36e7125c120d64392c3eee52528c24b5c4" +dependencies = [ + "zune-core 0.5.0-rc2", ] diff --git a/Cargo.toml b/Cargo.toml index 63ea32b94..0087406e9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -61,6 +61,7 @@ fontdb = { version = "0.23", default-features = false } fs_extra = "1.3" glidesort = "0.1.2" hayagriva = "0.8.1" +hayro-syntax = { path = "../hayro/hayro-syntax" } heck = "0.5" hypher = "0.1.4" icu_properties = { version = "1.4", features = ["serde"] } @@ -73,8 +74,8 @@ image = { version = "0.25.5", default-features = false, features = ["png", "jpeg indexmap = { version = "2", features = ["serde"] } infer = { version = "0.19.0", default-features = false } kamadak-exif = "0.6" -krilla = { git = "https://github.com/LaurenzV/krilla", rev = "20c14fe", default-features = false, features = ["raster-images", "comemo", "rayon"] } -krilla-svg = { git = "https://github.com/LaurenzV/krilla", rev = "20c14fe" } +krilla = { path = "../krilla/crates/krilla", default-features = false, features = ["raster-images", "comemo", "rayon", "pdf"] } +krilla-svg = { path = "../krilla/crates/krilla-svg" } kurbo = "0.11" libfuzzer-sys = "0.4" lipsum = "0.9" diff --git a/crates/typst-cli/src/args.rs b/crates/typst-cli/src/args.rs index b6b74fd9d..093735cbb 100644 --- a/crates/typst-cli/src/args.rs +++ b/crates/typst-cli/src/args.rs @@ -471,7 +471,7 @@ display_possible_values!(DiagnosticFormat); #[derive(Debug, Copy, Clone, Eq, PartialEq, ValueEnum)] pub enum Feature { Html, - PdfEmbedding + PdfEmbedding, } display_possible_values!(Feature); diff --git a/crates/typst-cli/src/world.rs b/crates/typst-cli/src/world.rs index ccd527d24..ab5216831 100644 --- a/crates/typst-cli/src/world.rs +++ b/crates/typst-cli/src/world.rs @@ -117,7 +117,7 @@ impl SystemWorld { .iter() .map(|&feature| match feature { Feature::Html => typst::Feature::Html, - Feature::PdfEmbedding => typst::Feature::PdfEmbedding + Feature::PdfEmbedding => typst::Feature::PdfEmbedding, }) .collect(); diff --git a/crates/typst-library/Cargo.toml b/crates/typst-library/Cargo.toml index f4b219882..2297f7ca7 100644 --- a/crates/typst-library/Cargo.toml +++ b/crates/typst-library/Cargo.toml @@ -31,6 +31,7 @@ flate2 = { workspace = true } fontdb = { workspace = true } glidesort = { workspace = true } hayagriva = { workspace = true } +hayro-syntax = { workspace = true } icu_properties = { workspace = true } icu_provider = { workspace = true } icu_provider_blob = { workspace = true } diff --git a/crates/typst-library/src/lib.rs b/crates/typst-library/src/lib.rs index 54d73115c..edbc182fa 100644 --- a/crates/typst-library/src/lib.rs +++ b/crates/typst-library/src/lib.rs @@ -237,7 +237,7 @@ impl FromIterator for Features { #[non_exhaustive] pub enum Feature { Html, - PdfEmbedding + PdfEmbedding, } /// A group of related standard library definitions. diff --git a/crates/typst-library/src/visualize/image/mod.rs b/crates/typst-library/src/visualize/image/mod.rs index b600ab17e..458787611 100644 --- a/crates/typst-library/src/visualize/image/mod.rs +++ b/crates/typst-library/src/visualize/image/mod.rs @@ -1,8 +1,8 @@ //! Image handling. +mod pdf; mod raster; mod svg; -mod pdf; pub use self::raster::{ ExchangeFormat, PixelEncoding, PixelFormat, RasterFormat, RasterImage, @@ -27,6 +27,7 @@ use crate::layout::{Length, Rel, Sizing}; use crate::loading::{DataSource, Load, LoadSource, Loaded, Readable}; use crate::model::Figurable; use crate::text::{families, LocalName}; +use crate::visualize::image::pdf::{PdfDocument, PdfImage}; /// A raster or vector graphic. /// @@ -127,6 +128,11 @@ pub struct ImageElem { /// A text describing the image. pub alt: Option, + /// The page number that should be embedded as an image. This attribute only has an effect + /// for PDF files. + #[default(1)] + pub page: usize, + /// How the image should adjust itself to a given area (the area is defined /// by the `width` and `height` fields). Note that `fit` doesn't visually /// change anything if the area's aspect ratio is the same as the image's @@ -262,6 +268,15 @@ impl Packed { ) .within(loaded)?, ), + ImageFormat::Vector(VectorFormat::Pdf) => { + let document = PdfDocument::new(loaded.data.clone()).within(loaded)?; + // The user provides the page number staring from 1, further down the pipeline they page + // numbers are 0-based. + let pdf_image = + PdfImage::new(document, self.page.get(styles) - 1).within(loaded)?; + + ImageKind::Pdf(pdf_image) + } }; Ok(Image::new(kind, self.alt.get_cloned(styles), self.scaling.get(styles))) @@ -287,6 +302,7 @@ impl Packed { "jpg" | "jpeg" => return Ok(ExchangeFormat::Jpg.into()), "gif" => return Ok(ExchangeFormat::Gif.into()), "svg" | "svgz" => return Ok(VectorFormat::Svg.into()), + "pdf" => return Ok(VectorFormat::Pdf.into()), "webp" => return Ok(ExchangeFormat::Webp.into()), _ => {} } @@ -374,6 +390,7 @@ impl Image { match &self.0.kind { ImageKind::Raster(raster) => raster.format().into(), ImageKind::Svg(_) => VectorFormat::Svg.into(), + ImageKind::Pdf(_) => VectorFormat::Pdf.into(), } } @@ -382,6 +399,7 @@ impl Image { match &self.0.kind { ImageKind::Raster(raster) => raster.width() as f64, ImageKind::Svg(svg) => svg.width(), + ImageKind::Pdf(pdf) => pdf.width() as f64, } } @@ -390,6 +408,7 @@ impl Image { match &self.0.kind { ImageKind::Raster(raster) => raster.height() as f64, ImageKind::Svg(svg) => svg.height(), + ImageKind::Pdf(pdf) => pdf.height() as f64, } } @@ -398,6 +417,7 @@ impl Image { match &self.0.kind { ImageKind::Raster(raster) => raster.dpi(), ImageKind::Svg(_) => Some(Image::USVG_DEFAULT_DPI), + ImageKind::Pdf(_) => Some(Image::DEFAULT_DPI), } } @@ -436,6 +456,8 @@ pub enum ImageKind { Raster(RasterImage), /// An SVG image. Svg(SvgImage), + /// A PDF image. + Pdf(PdfImage), } impl From for ImageKind { @@ -469,9 +491,9 @@ impl ImageFormat { if is_svg(data) { return Some(Self::Vector(VectorFormat::Svg)); } - + if is_pdf(data) { - return Some(Self::Vector(VectorFormat::Pdf)) + return Some(Self::Vector(VectorFormat::Pdf)); } None diff --git a/crates/typst-library/src/visualize/image/pdf.rs b/crates/typst-library/src/visualize/image/pdf.rs index a9685393e..c19dc3ab6 100644 --- a/crates/typst-library/src/visualize/image/pdf.rs +++ b/crates/typst-library/src/visualize/image/pdf.rs @@ -1,6 +1,87 @@ -use std::sync::Arc; +use crate::diag::LoadResult; use crate::foundations::Bytes; +use hayro_syntax::pdf::Pdf; +use std::hash::{Hash, Hasher}; +use std::sync::Arc; -/// A PDF image. +#[derive(Clone)] +struct DocumentRepr { + pdf: Arc, + data: Bytes, + page_sizes: Vec<(f32, f32)>, +} + +impl Hash for DocumentRepr { + fn hash(&self, state: &mut H) { + self.data.hash(state); + } +} + +/// A PDF document. #[derive(Clone, Hash)] -pub struct PdfImage(Bytes); \ No newline at end of file +pub struct PdfDocument(Arc); + +impl PdfDocument { + /// Load a PDF document. + #[comemo::memoize] + #[typst_macros::time(name = "load pdf document")] + pub fn new(data: Bytes) -> LoadResult { + // TODO: Remove unwraps + let pdf = Arc::new(Pdf::new(Arc::new(data.clone())).unwrap()); + let pages = pdf.pages().unwrap(); + + let page_sizes = pages.get().iter().map(|p| p.render_dimensions()).collect(); + + Ok(Self(Arc::new(DocumentRepr { data, pdf, page_sizes }))) + } +} + +struct ImageRepr { + pub document: PdfDocument, + pub page: usize, + pub width: f32, + pub height: f32, +} + +impl Hash for ImageRepr { + fn hash(&self, state: &mut H) { + self.document.hash(state); + self.page.hash(state); + } +} + +/// A page of a PDF file. +#[derive(Clone, Hash)] +pub struct PdfImage(Arc); + +impl PdfImage { + #[comemo::memoize] + pub fn new(document: PdfDocument, page: usize) -> LoadResult { + // TODO: Don't allow loading if pdf-embedding feature is disabled. + // TODO: Remove Unwrap + let dimensions = *(&document.0).page_sizes.get(page).unwrap(); + + Ok(Self(Arc::new(ImageRepr { + document, + page, + width: dimensions.0, + height: dimensions.1, + }))) + } + + pub fn width(&self) -> f32 { + self.0.width + } + + pub fn height(&self) -> f32 { + self.0.height + } + + pub fn data(&self) -> &Bytes { + &self.0.document.0.data + } + + pub fn page(&self) -> usize { + self.0.page + } +} diff --git a/crates/typst-pdf/src/convert.rs b/crates/typst-pdf/src/convert.rs index 9e2aa87b7..a76091e94 100644 --- a/crates/typst-pdf/src/convert.rs +++ b/crates/typst-pdf/src/convert.rs @@ -363,6 +363,11 @@ fn finish( hint: "convert the image to 8 bit instead" ) } + KrillaError::Pdf(_, e, loc) => { + // TODO: Better errors + let span = to_span(loc); + bail!(span, "failed to process PDF"); + } }, } } @@ -576,6 +581,17 @@ fn convert_error( "{prefix} missing document date"; hint: "set the date of the document" ), + ValidationError::DuplicateTagId(_, loc) => error!( + to_span(*loc), + "{prefix} duplicate tag id"; + hint: "please report this as a bug" + ), + ValidationError::UnknownTagId(_, loc) => error!( + to_span(*loc), + "{prefix} unknown tag id"; + hint: "please report this as a bug" + ), + ValidationError::EmbeddedPDF(loc) => error!(to_span(*loc), "TODO"), } } diff --git a/crates/typst-pdf/src/image.rs b/crates/typst-pdf/src/image.rs index 93bdb1950..6756877cc 100644 --- a/crates/typst-pdf/src/image.rs +++ b/crates/typst-pdf/src/image.rs @@ -3,6 +3,7 @@ use std::sync::{Arc, OnceLock}; use image::{DynamicImage, EncodableLayout, GenericImageView, Rgba}; use krilla::image::{BitsPerComponent, CustomImage, ImageColorspace}; +use krilla::pdf::PdfDocument; use krilla::surface::Surface; use krilla_svg::{SurfaceExt, SvgSettings}; use typst_library::diag::{bail, SourceResult}; @@ -60,6 +61,15 @@ pub(crate) fn handle_image( SvgSettings { embed_text: true, ..Default::default() }, ); } + ImageKind::Pdf(pdf) => { + let pdf_data: Arc + Send + Sync> = + Arc::new(pdf.data().clone()); + surface.draw_pdf_page( + &PdfDocument::new(pdf_data.into()).unwrap(), + size.to_krilla(), + pdf.page(), + ) + } } if image.alt().is_some() { diff --git a/crates/typst-render/src/image.rs b/crates/typst-render/src/image.rs index 7425bdd2f..ad9e329df 100644 --- a/crates/typst-render/src/image.rs +++ b/crates/typst-render/src/image.rs @@ -94,6 +94,7 @@ fn build_texture(image: &Image, w: u32, h: u32) -> Option> { ); resvg::render(tree, ts, &mut texture.as_mut()); } + ImageKind::Pdf(_) => todo!(), } Some(Arc::new(texture)) } diff --git a/crates/typst-svg/src/image.rs b/crates/typst-svg/src/image.rs index fd4aecd4f..069e5b10a 100644 --- a/crates/typst-svg/src/image.rs +++ b/crates/typst-svg/src/image.rs @@ -66,6 +66,7 @@ pub fn convert_image_to_base64_url(image: &Image) -> EcoString { }), }, ImageKind::Svg(svg) => ("svg+xml", svg.data()), + ImageKind::Pdf(_) => todo!(), }; let mut url = eco_format!("data:image/{format};base64,");