From ce6975e65a0e9f3b0d625735159583e9860e1fdb Mon Sep 17 00:00:00 2001 From: Tobias Schmitz Date: Thu, 8 May 2025 10:46:28 +0200 Subject: [PATCH] feat: use the infer crate to determine if pdf embeds should be compressed --- Cargo.lock | 27 +++++++++++++++++++++++++++ Cargo.toml | 1 + crates/typst-pdf/Cargo.toml | 1 + crates/typst-pdf/src/embed.rs | 21 ++++++++++++++++++++- 4 files changed, 49 insertions(+), 1 deletion(-) diff --git a/Cargo.lock b/Cargo.lock index ab2d2cc83..731ee422b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -255,6 +255,17 @@ dependencies = [ "shlex", ] +[[package]] +name = "cfb" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d38f2da7a0a2c4ccf0065be06397cc26a81f4e528be095826eee9d4adbb8c60f" +dependencies = [ + "byteorder", + "fnv", + "uuid", +] + [[package]] name = "cfg-if" version = "1.0.0" @@ -1259,6 +1270,15 @@ dependencies = [ "serde", ] +[[package]] +name = "infer" +version = "0.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a588916bfdfd92e71cacef98a63d9b1f0d74d6599980d11894290e7ddefffcf7" +dependencies = [ + "cfb", +] + [[package]] name = "inotify" version = "0.11.0" @@ -3127,6 +3147,7 @@ dependencies = [ "comemo", "ecow", "image", + "infer", "krilla", "krilla-svg", "serde", @@ -3430,6 +3451,12 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" +[[package]] +name = "uuid" +version = "1.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "458f7a779bf54acc9f347480ac654f68407d3aab21269a6e3c9f922acd9e2da9" + [[package]] name = "vcpkg" version = "0.2.15" diff --git a/Cargo.toml b/Cargo.toml index 12870b809..fb448b669 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -71,6 +71,7 @@ icu_segmenter = { version = "1.4", features = ["serde"] } if_chain = "1" image = { version = "0.25.5", default-features = false, features = ["png", "jpeg", "gif"] } indexmap = { version = "2", features = ["serde"] } +infer = "0.19.0" kamadak-exif = "0.6" krilla = { version = "0.4.0", default-features = false, features = ["raster-images", "comemo", "rayon"] } krilla-svg = "0.1.0" diff --git a/crates/typst-pdf/Cargo.toml b/crates/typst-pdf/Cargo.toml index f6f08b5bc..5745d0530 100644 --- a/crates/typst-pdf/Cargo.toml +++ b/crates/typst-pdf/Cargo.toml @@ -23,6 +23,7 @@ bytemuck = { workspace = true } comemo = { workspace = true } ecow = { workspace = true } image = { workspace = true } +infer = { workspace = true } krilla = { workspace = true } krilla-svg = { workspace = true } serde = { workspace = true } diff --git a/crates/typst-pdf/src/embed.rs b/crates/typst-pdf/src/embed.rs index 6ed65a2b6..b8d9e16b7 100644 --- a/crates/typst-pdf/src/embed.rs +++ b/crates/typst-pdf/src/embed.rs @@ -34,6 +34,7 @@ pub(crate) fn embed_files( }, }; let data: Arc + Send + Sync> = Arc::new(embed.data.clone()); + let compress = should_compress(&embed.data); let file = EmbeddedFile { path, @@ -41,7 +42,7 @@ pub(crate) fn embed_files( description, association_kind, data: data.into(), - compress: true, + compress, location: Some(span.into_raw().get()), }; @@ -52,3 +53,21 @@ pub(crate) fn embed_files( Ok(()) } + +fn should_compress(data: &[u8]) -> bool { + let Some(ty) = infer::get(data) else { + return false; + }; + match ty.matcher_type() { + infer::MatcherType::App => true, + infer::MatcherType::Archive => false, + infer::MatcherType::Audio => false, + infer::MatcherType::Book => true, + infer::MatcherType::Doc => true, + infer::MatcherType::Font => true, + infer::MatcherType::Image => false, + infer::MatcherType::Text => true, + infer::MatcherType::Video => false, + infer::MatcherType::Custom => true, + } +}