Use the infer crate to determine if pdf embeds should be compressed (#6256)

This commit is contained in:
Tobias Schmitz 2025-05-12 10:07:43 +02:00 committed by GitHub
parent 54c5113a83
commit 26c19a49c8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 78 additions and 1 deletions

7
Cargo.lock generated
View File

@ -1259,6 +1259,12 @@ dependencies = [
"serde", "serde",
] ]
[[package]]
name = "infer"
version = "0.19.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a588916bfdfd92e71cacef98a63d9b1f0d74d6599980d11894290e7ddefffcf7"
[[package]] [[package]]
name = "inotify" name = "inotify"
version = "0.11.0" version = "0.11.0"
@ -3127,6 +3133,7 @@ dependencies = [
"comemo", "comemo",
"ecow", "ecow",
"image", "image",
"infer",
"krilla", "krilla",
"krilla-svg", "krilla-svg",
"serde", "serde",

View File

@ -71,6 +71,7 @@ icu_segmenter = { version = "1.4", features = ["serde"] }
if_chain = "1" if_chain = "1"
image = { version = "0.25.5", default-features = false, features = ["png", "jpeg", "gif"] } image = { version = "0.25.5", default-features = false, features = ["png", "jpeg", "gif"] }
indexmap = { version = "2", features = ["serde"] } indexmap = { version = "2", features = ["serde"] }
infer = { version = "0.19.0", default-features = false }
kamadak-exif = "0.6" kamadak-exif = "0.6"
krilla = { version = "0.4.0", default-features = false, features = ["raster-images", "comemo", "rayon"] } krilla = { version = "0.4.0", default-features = false, features = ["raster-images", "comemo", "rayon"] }
krilla-svg = "0.1.0" krilla-svg = "0.1.0"

View File

@ -23,6 +23,7 @@ bytemuck = { workspace = true }
comemo = { workspace = true } comemo = { workspace = true }
ecow = { workspace = true } ecow = { workspace = true }
image = { workspace = true } image = { workspace = true }
infer = { workspace = true }
krilla = { workspace = true } krilla = { workspace = true }
krilla-svg = { workspace = true } krilla-svg = { workspace = true }
serde = { workspace = true } serde = { workspace = true }

View File

@ -34,6 +34,8 @@ pub(crate) fn embed_files(
}, },
}; };
let data: Arc<dyn AsRef<[u8]> + Send + Sync> = Arc::new(embed.data.clone()); let data: Arc<dyn AsRef<[u8]> + Send + Sync> = Arc::new(embed.data.clone());
// TODO: update when new krilla version lands (https://github.com/LaurenzV/krilla/pull/203)
let compress = should_compress(&embed.data).unwrap_or(true);
let file = EmbeddedFile { let file = EmbeddedFile {
path, path,
@ -41,7 +43,7 @@ pub(crate) fn embed_files(
description, description,
association_kind, association_kind,
data: data.into(), data: data.into(),
compress: true, compress,
location: Some(span.into_raw().get()), location: Some(span.into_raw().get()),
}; };
@ -52,3 +54,69 @@ pub(crate) fn embed_files(
Ok(()) Ok(())
} }
fn should_compress(data: &[u8]) -> Option<bool> {
let ty = infer::get(data)?;
match ty.matcher_type() {
infer::MatcherType::App => None,
infer::MatcherType::Archive => match ty.mime_type() {
#[rustfmt::skip]
"application/zip"
| "application/vnd.rar"
| "application/gzip"
| "application/x-bzip2"
| "application/vnd.bzip3"
| "application/x-7z-compressed"
| "application/x-xz"
| "application/vnd.ms-cab-compressed"
| "application/vnd.debian.binary-package"
| "application/x-compress"
| "application/x-lzip"
| "application/x-rpm"
| "application/zstd"
| "application/x-lz4"
| "application/x-ole-storage" => Some(false),
_ => None,
},
infer::MatcherType::Audio => match ty.mime_type() {
#[rustfmt::skip]
"audio/mpeg"
| "audio/m4a"
| "audio/opus"
| "audio/ogg"
| "audio/x-flac"
| "audio/amr"
| "audio/aac"
| "audio/x-ape" => Some(false),
_ => None,
},
infer::MatcherType::Book => None,
infer::MatcherType::Doc => None,
infer::MatcherType::Font => None,
infer::MatcherType::Image => match ty.mime_type() {
#[rustfmt::skip]
"image/jpeg"
| "image/jp2"
| "image/png"
| "image/webp"
| "image/vnd.ms-photo"
| "image/heif"
| "image/avif"
| "image/jxl"
| "image/vnd.djvu" => None,
_ => None,
},
infer::MatcherType::Text => None,
infer::MatcherType::Video => match ty.mime_type() {
#[rustfmt::skip]
"video/mp4"
| "video/x-m4v"
| "video/x-matroska"
| "video/webm"
| "video/quicktime"
| "video/x-flv" => Some(false),
_ => None,
},
infer::MatcherType::Custom => None,
}
}