From ce6975e65a0e9f3b0d625735159583e9860e1fdb Mon Sep 17 00:00:00 2001 From: Tobias Schmitz Date: Thu, 8 May 2025 10:46:28 +0200 Subject: [PATCH 1/4] feat: use the infer crate to determine if pdf embeds should be compressed --- Cargo.lock | 27 +++++++++++++++++++++++++++ Cargo.toml | 1 + crates/typst-pdf/Cargo.toml | 1 + crates/typst-pdf/src/embed.rs | 21 ++++++++++++++++++++- 4 files changed, 49 insertions(+), 1 deletion(-) diff --git a/Cargo.lock b/Cargo.lock index ab2d2cc83..731ee422b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -255,6 +255,17 @@ dependencies = [ "shlex", ] +[[package]] +name = "cfb" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d38f2da7a0a2c4ccf0065be06397cc26a81f4e528be095826eee9d4adbb8c60f" +dependencies = [ + "byteorder", + "fnv", + "uuid", +] + [[package]] name = "cfg-if" version = "1.0.0" @@ -1259,6 +1270,15 @@ dependencies = [ "serde", ] +[[package]] +name = "infer" +version = "0.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a588916bfdfd92e71cacef98a63d9b1f0d74d6599980d11894290e7ddefffcf7" +dependencies = [ + "cfb", +] + [[package]] name = "inotify" version = "0.11.0" @@ -3127,6 +3147,7 @@ dependencies = [ "comemo", "ecow", "image", + "infer", "krilla", "krilla-svg", "serde", @@ -3430,6 +3451,12 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" +[[package]] +name = "uuid" +version = "1.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "458f7a779bf54acc9f347480ac654f68407d3aab21269a6e3c9f922acd9e2da9" + [[package]] name = "vcpkg" version = "0.2.15" diff --git a/Cargo.toml b/Cargo.toml index 12870b809..fb448b669 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -71,6 +71,7 @@ icu_segmenter = { version = "1.4", features = ["serde"] } if_chain = "1" image = { version = "0.25.5", default-features = false, features = ["png", "jpeg", "gif"] } indexmap = { version = "2", features = ["serde"] } +infer = "0.19.0" kamadak-exif = "0.6" krilla = { version = "0.4.0", default-features = false, features = ["raster-images", "comemo", "rayon"] } krilla-svg = "0.1.0" diff --git a/crates/typst-pdf/Cargo.toml b/crates/typst-pdf/Cargo.toml index f6f08b5bc..5745d0530 100644 --- a/crates/typst-pdf/Cargo.toml +++ b/crates/typst-pdf/Cargo.toml @@ -23,6 +23,7 @@ bytemuck = { workspace = true } comemo = { workspace = true } ecow = { workspace = true } image = { workspace = true } +infer = { workspace = true } krilla = { workspace = true } krilla-svg = { workspace = true } serde = { workspace = true } diff --git a/crates/typst-pdf/src/embed.rs b/crates/typst-pdf/src/embed.rs index 6ed65a2b6..b8d9e16b7 100644 --- a/crates/typst-pdf/src/embed.rs +++ b/crates/typst-pdf/src/embed.rs @@ -34,6 +34,7 @@ pub(crate) fn embed_files( }, }; let data: Arc + Send + Sync> = Arc::new(embed.data.clone()); + let compress = should_compress(&embed.data); let file = EmbeddedFile { path, @@ -41,7 +42,7 @@ pub(crate) fn embed_files( description, association_kind, data: data.into(), - compress: true, + compress, location: Some(span.into_raw().get()), }; @@ -52,3 +53,21 @@ pub(crate) fn embed_files( Ok(()) } + +fn should_compress(data: &[u8]) -> bool { + let Some(ty) = infer::get(data) else { + return false; + }; + match ty.matcher_type() { + infer::MatcherType::App => true, + infer::MatcherType::Archive => false, + infer::MatcherType::Audio => false, + infer::MatcherType::Book => true, + infer::MatcherType::Doc => true, + infer::MatcherType::Font => true, + infer::MatcherType::Image => false, + infer::MatcherType::Text => true, + infer::MatcherType::Video => false, + infer::MatcherType::Custom => true, + } +} From c8dfb7b9ec56daf95a11b3db8ed273417b4531ff Mon Sep 17 00:00:00 2001 From: Tobias Schmitz Date: Thu, 8 May 2025 15:36:31 +0200 Subject: [PATCH 2/4] fix: compress pdf embeds when the type is unknown --- crates/typst-pdf/src/embed.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/typst-pdf/src/embed.rs b/crates/typst-pdf/src/embed.rs index b8d9e16b7..4130bc73f 100644 --- a/crates/typst-pdf/src/embed.rs +++ b/crates/typst-pdf/src/embed.rs @@ -56,7 +56,7 @@ pub(crate) fn embed_files( fn should_compress(data: &[u8]) -> bool { let Some(ty) = infer::get(data) else { - return false; + return true; }; match ty.matcher_type() { infer::MatcherType::App => true, From 0cef699f3eceaf5ab24a5badf9d934f9bbd98604 Mon Sep 17 00:00:00 2001 From: Tobias Schmitz Date: Thu, 8 May 2025 15:36:55 +0200 Subject: [PATCH 3/4] feat: opt out of pdf embed compression by mime type --- crates/typst-pdf/src/embed.rs | 58 ++++++++++++++++++++++++++++++++--- 1 file changed, 54 insertions(+), 4 deletions(-) diff --git a/crates/typst-pdf/src/embed.rs b/crates/typst-pdf/src/embed.rs index 4130bc73f..0199d8e96 100644 --- a/crates/typst-pdf/src/embed.rs +++ b/crates/typst-pdf/src/embed.rs @@ -60,14 +60,64 @@ fn should_compress(data: &[u8]) -> bool { }; match ty.matcher_type() { infer::MatcherType::App => true, - infer::MatcherType::Archive => false, - infer::MatcherType::Audio => false, + infer::MatcherType::Archive => match ty.mime_type() { + #[rustfmt::skip] + "application/zip" + | "application/vnd.rar" + | "application/gzip" + | "application/x-bzip2" + | "application/vnd.bzip3" + | "application/x-7z-compressed" + | "application/x-xz" + | "application/vnd.ms-cab-compressed" + | "application/vnd.debian.binary-package" + | "application/x-compress" + | "application/x-lzip" + | "application/x-rpm" + | "application/zstd" + | "application/x-lz4" + | "application/x-ole-storage" => false, + _ => true, + }, + infer::MatcherType::Audio => match ty.mime_type() { + #[rustfmt::skip] + "audio/mpeg" + | "audio/m4a" + | "audio/opus" + | "audio/ogg" + | "audio/x-flac" + | "audio/amr" + | "audio/aac" + | "audio/x-ape" => false, + _ => true, + }, infer::MatcherType::Book => true, infer::MatcherType::Doc => true, infer::MatcherType::Font => true, - infer::MatcherType::Image => false, + infer::MatcherType::Image => match ty.mime_type() { + #[rustfmt::skip] + "image/jpeg" + | "image/jp2" + | "image/png" + | "image/webp" + | "image/vnd.ms-photo" + | "image/heif" + | "image/avif" + | "image/jxl" + | "image/vnd.djvu" => false, + _ => true, + }, infer::MatcherType::Text => true, - infer::MatcherType::Video => false, + infer::MatcherType::Video => match ty.mime_type() { + #[rustfmt::skip] + "video/mp4" + | "video/x-m4v" + | "video/x-matroska" + | "video/webm" + | "video/quicktime" + | "video/x-flv" => false, + _ => true, + }, infer::MatcherType::Custom => true, } } From cd659bf460e820ac1c2eadacd34e007d135294d8 Mon Sep 17 00:00:00 2001 From: Tobias Schmitz Date: Sat, 10 May 2025 14:28:34 +0200 Subject: [PATCH 4/4] refactor: prepare for automatic compression of embedded files in krilla --- crates/typst-pdf/src/embed.rs | 37 +++++++++++++++++------------------ 1 file changed, 18 insertions(+), 19 deletions(-) diff --git a/crates/typst-pdf/src/embed.rs b/crates/typst-pdf/src/embed.rs index 0199d8e96..f0cd9060a 100644 --- a/crates/typst-pdf/src/embed.rs +++ b/crates/typst-pdf/src/embed.rs @@ -34,7 +34,8 @@ pub(crate) fn embed_files( }, }; let data: Arc + Send + Sync> = Arc::new(embed.data.clone()); - let compress = should_compress(&embed.data); + // TODO: update when new krilla version lands (https://github.com/LaurenzV/krilla/pull/203) + let compress = should_compress(&embed.data).unwrap_or(true); let file = EmbeddedFile { path, @@ -54,12 +55,10 @@ pub(crate) fn embed_files( Ok(()) } -fn should_compress(data: &[u8]) -> bool { - let Some(ty) = infer::get(data) else { - return true; - }; +fn should_compress(data: &[u8]) -> Option { + let ty = infer::get(data)?; match ty.matcher_type() { - infer::MatcherType::App => true, + infer::MatcherType::App => None, infer::MatcherType::Archive => match ty.mime_type() { #[rustfmt::skip] "application/zip" @@ -76,8 +75,8 @@ fn should_compress(data: &[u8]) -> bool { | "application/x-rpm" | "application/zstd" | "application/x-lz4" - | "application/x-ole-storage" => false, - _ => true, + | "application/x-ole-storage" => Some(false), + _ => None, }, infer::MatcherType::Audio => match ty.mime_type() { #[rustfmt::skip] @@ -88,12 +87,12 @@ fn should_compress(data: &[u8]) -> bool { | "audio/x-flac" | "audio/amr" | "audio/aac" - | "audio/x-ape" => false, - _ => true, + | "audio/x-ape" => Some(false), + _ => None, }, - infer::MatcherType::Book => true, - infer::MatcherType::Doc => true, - infer::MatcherType::Font => true, + infer::MatcherType::Book => None, + infer::MatcherType::Doc => None, + infer::MatcherType::Font => None, infer::MatcherType::Image => match ty.mime_type() { #[rustfmt::skip] "image/jpeg" @@ -104,10 +103,10 @@ fn should_compress(data: &[u8]) -> bool { | "image/heif" | "image/avif" | "image/jxl" - | "image/vnd.djvu" => false, - _ => true, + | "image/vnd.djvu" => None, + _ => None, }, - infer::MatcherType::Text => true, + infer::MatcherType::Text => None, infer::MatcherType::Video => match ty.mime_type() { #[rustfmt::skip] "video/mp4" @@ -115,9 +114,9 @@ fn should_compress(data: &[u8]) -> bool { | "video/x-matroska" | "video/webm" | "video/quicktime" - | "video/x-flv" => false, - _ => true, + | "video/x-flv" => Some(false), + _ => None, }, - infer::MatcherType::Custom => true, + infer::MatcherType::Custom => None, } }