Compare commits

...

7 Commits

Author SHA1 Message Date
Myriad-Dreamin
67a260837b
Merge eaf63ca80cdd13b6ef801262ab1b47c82dc0fd4a into af2253ba16dfdc731e787e3a43a6f6a63ea65e0a 2025-07-22 13:21:29 +02:00
Laurenz Stampfl
af2253ba16
Add support for PDF embedding (#6623)
Co-authored-by: Laurenz <laurmaedje@gmail.com>
2025-07-22 11:06:44 +00:00
Myriad-Dreamin
eaf63ca80c Remove comment 2025-06-03 02:06:53 +08:00
Myriad-Dreamin
e4c316f2cc Add tests 2025-05-28 22:19:59 +08:00
Myriad-Dreamin
1130d6b746 Core Impl 2025-05-28 22:19:56 +08:00
Myriad-Dreamin
1b44fea9d8 Add egotree 2025-05-28 22:00:33 +08:00
Myriad-Dreamin
8311997274 Add scraper 2025-05-28 22:00:33 +08:00
19 changed files with 842 additions and 63 deletions

341
Cargo.lock generated
View File

@ -181,9 +181,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
[[package]] [[package]]
name = "bitflags" name = "bitflags"
version = "2.8.0" version = "2.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8f68f53c83ab957f72c32642f3868eec03eb974d1fb82e453128456482613d36" checksum = "1b8e56985ec62d17e9c1001dc89c88ecd7dc08e47eba5ec7c29c7b5eeecde967"
dependencies = [ dependencies = [
"serde", "serde",
] ]
@ -214,9 +214,9 @@ checksum = "64fa3c856b712db6612c019f14756e64e4bcea13337a6b33b696333a9eaa2d06"
[[package]] [[package]]
name = "bytemuck" name = "bytemuck"
version = "1.21.0" version = "1.23.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ef657dfab802224e671f5818e9a4935f9b1957ed18e58292690cc39e7a4092a3" checksum = "5c76a5792e44e4abe34d3abf15636779261d45a7450612059293d1d2cfc63422"
dependencies = [ dependencies = [
"bytemuck_derive", "bytemuck_derive",
] ]
@ -545,6 +545,29 @@ version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "43da5946c66ffcc7745f48db692ffbb10a83bfe0afd96235c5c2a4fb23994929" checksum = "43da5946c66ffcc7745f48db692ffbb10a83bfe0afd96235c5c2a4fb23994929"
[[package]]
name = "cssparser"
version = "0.34.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b7c66d1cd8ed61bf80b38432613a7a2f09401ab8d0501110655f8b341484a3e3"
dependencies = [
"cssparser-macros",
"dtoa-short",
"itoa",
"phf",
"smallvec",
]
[[package]]
name = "cssparser-macros"
version = "0.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "13b588ba4ac1a99f7f2964d24b3d896ddc6bf847ee3855dbd4366f058cfcd331"
dependencies = [
"quote",
"syn",
]
[[package]] [[package]]
name = "csv" name = "csv"
version = "1.3.1" version = "1.3.1"
@ -592,6 +615,17 @@ dependencies = [
"syn", "syn",
] ]
[[package]]
name = "derive_more"
version = "0.99.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6edb4b64a43d977b8e99788fe3a04d483834fba1215a7e02caa415b626497f7f"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]] [[package]]
name = "dirs" name = "dirs"
version = "6.0.0" version = "6.0.0"
@ -630,6 +664,21 @@ version = "1.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "75b325c5dbd37f80359721ad39aca5a29fb04c89279657cffdda8736d0c0b9d2" checksum = "75b325c5dbd37f80359721ad39aca5a29fb04c89279657cffdda8736d0c0b9d2"
[[package]]
name = "dtoa"
version = "1.0.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d6add3b8cff394282be81f3fc1a0605db594ed69890078ca6e2cab1c408bcf04"
[[package]]
name = "dtoa-short"
version = "0.3.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cd1511a7b6a56299bd043a9c167a6d2bfb37bf84a6dfceaba651168adfb43c87"
dependencies = [
"dtoa",
]
[[package]] [[package]]
name = "ecow" name = "ecow"
version = "0.2.3" version = "0.2.3"
@ -639,6 +688,12 @@ dependencies = [
"serde", "serde",
] ]
[[package]]
name = "ego-tree"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b2972feb8dffe7bc8c5463b1dacda1b0dfbed3710e50f977d965429692d74cd8"
[[package]] [[package]]
name = "either" name = "either"
version = "1.13.0" version = "1.13.0"
@ -861,6 +916,16 @@ version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c" checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c"
[[package]]
name = "futf"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "df420e2e84819663797d1ec6544b13c5be84629e7bb00dc960d6917db2987843"
dependencies = [
"mac",
"new_debug_unreachable",
]
[[package]] [[package]]
name = "fxhash" name = "fxhash"
version = "0.2.1" version = "0.2.1"
@ -964,12 +1029,87 @@ dependencies = [
"url", "url",
] ]
[[package]]
name = "hayro"
version = "0.1.0"
source = "git+https://github.com/LaurenzV/hayro?rev=e701f95#e701f9569157a2fe4ade68930dc9e9283782dcca"
dependencies = [
"bytemuck",
"hayro-interpret",
"image",
"kurbo",
"rustc-hash",
"smallvec",
]
[[package]]
name = "hayro-font"
version = "0.1.0"
source = "git+https://github.com/LaurenzV/hayro?rev=e701f95#e701f9569157a2fe4ade68930dc9e9283782dcca"
dependencies = [
"log",
"phf",
]
[[package]]
name = "hayro-interpret"
version = "0.1.0"
source = "git+https://github.com/LaurenzV/hayro?rev=e701f95#e701f9569157a2fe4ade68930dc9e9283782dcca"
dependencies = [
"bitflags 2.9.1",
"hayro-font",
"hayro-syntax",
"kurbo",
"log",
"phf",
"qcms",
"skrifa",
"smallvec",
"yoke 0.8.0",
]
[[package]]
name = "hayro-syntax"
version = "0.0.1"
source = "git+https://github.com/LaurenzV/hayro?rev=e701f95#e701f9569157a2fe4ade68930dc9e9283782dcca"
dependencies = [
"flate2",
"kurbo",
"log",
"rustc-hash",
"smallvec",
"zune-jpeg",
]
[[package]]
name = "hayro-write"
version = "0.1.0"
source = "git+https://github.com/LaurenzV/hayro?rev=e701f95#e701f9569157a2fe4ade68930dc9e9283782dcca"
dependencies = [
"flate2",
"hayro-syntax",
"log",
"pdf-writer",
]
[[package]] [[package]]
name = "heck" name = "heck"
version = "0.5.0" version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
[[package]]
name = "html5ever"
version = "0.29.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3b7410cae13cbc75623c98ac4cbfd1f0bedddf3227afc24f370cf0f50a44a11c"
dependencies = [
"log",
"mac",
"markup5ever",
"match_token",
]
[[package]] [[package]]
name = "httpdate" name = "httpdate"
version = "1.0.3" version = "1.0.3"
@ -1200,9 +1340,9 @@ dependencies = [
[[package]] [[package]]
name = "image" name = "image"
version = "0.25.5" version = "0.25.6"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cd6f44aed642f18953a158afeb30206f4d50da59fbc66ecb53c66488de73563b" checksum = "db35664ce6b9810857a38a906215e75a9c879f0696556a39f59c62829710251a"
dependencies = [ dependencies = [
"bytemuck", "bytemuck",
"byteorder-lite", "byteorder-lite",
@ -1265,7 +1405,7 @@ version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f37dccff2791ab604f9babef0ba14fbe0be30bd368dc541e2b08d07c8aa908f3" checksum = "f37dccff2791ab604f9babef0ba14fbe0be30bd368dc541e2b08d07c8aa908f3"
dependencies = [ dependencies = [
"bitflags 2.8.0", "bitflags 2.9.1",
"inotify-sys", "inotify-sys",
"libc", "libc",
] ]
@ -1361,7 +1501,7 @@ dependencies = [
[[package]] [[package]]
name = "krilla" name = "krilla"
version = "0.4.0" version = "0.4.0"
source = "git+https://github.com/LaurenzV/krilla?rev=20c14fe#20c14fefee5002566b3d6668b338bbe2168784e7" source = "git+https://github.com/LaurenzV/krilla?rev=37b9a00#37b9a00bfac87ed0b347b7cf8e9d37a6f68fcccd"
dependencies = [ dependencies = [
"base64", "base64",
"bumpalo", "bumpalo",
@ -1370,6 +1510,7 @@ dependencies = [
"float-cmp 0.10.0", "float-cmp 0.10.0",
"fxhash", "fxhash",
"gif", "gif",
"hayro-write",
"image-webp", "image-webp",
"imagesize", "imagesize",
"once_cell", "once_cell",
@ -1379,6 +1520,7 @@ dependencies = [
"rustybuzz", "rustybuzz",
"siphasher", "siphasher",
"skrifa", "skrifa",
"smallvec",
"subsetter", "subsetter",
"tiny-skia-path", "tiny-skia-path",
"xmp-writer", "xmp-writer",
@ -1389,7 +1531,7 @@ dependencies = [
[[package]] [[package]]
name = "krilla-svg" name = "krilla-svg"
version = "0.1.0" version = "0.1.0"
source = "git+https://github.com/LaurenzV/krilla?rev=20c14fe#20c14fefee5002566b3d6668b338bbe2168784e7" source = "git+https://github.com/LaurenzV/krilla?rev=37b9a00#37b9a00bfac87ed0b347b7cf8e9d37a6f68fcccd"
dependencies = [ dependencies = [
"flate2", "flate2",
"fontdb", "fontdb",
@ -1402,9 +1544,9 @@ dependencies = [
[[package]] [[package]]
name = "kurbo" name = "kurbo"
version = "0.11.1" version = "0.11.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "89234b2cc610a7dd927ebde6b41dd1a5d4214cffaef4cf1fb2195d592f92518f" checksum = "1077d333efea6170d9ccb96d3c3026f300ca0773da4938cc4c811daa6df68b0c"
dependencies = [ dependencies = [
"arrayvec", "arrayvec",
"smallvec", "smallvec",
@ -1456,7 +1598,7 @@ version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c0ff37bd590ca25063e35af745c343cb7a0271906fb7b37e4813e8f79f00268d" checksum = "c0ff37bd590ca25063e35af745c343cb7a0271906fb7b37e4813e8f79f00268d"
dependencies = [ dependencies = [
"bitflags 2.8.0", "bitflags 2.9.1",
"libc", "libc",
"redox_syscall", "redox_syscall",
] ]
@ -1534,6 +1676,37 @@ dependencies = [
"pkg-config", "pkg-config",
] ]
[[package]]
name = "mac"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4"
[[package]]
name = "markup5ever"
version = "0.14.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c7a7213d12e1864c0f002f52c2923d4556935a43dec5e71355c2760e0f6e7a18"
dependencies = [
"log",
"phf",
"phf_codegen",
"string_cache",
"string_cache_codegen",
"tendril",
]
[[package]]
name = "match_token"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "88a9689d8d44bf9964484516275f5cd4c9b59457a6940c1d5d0ecbb94510a36b"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]] [[package]]
name = "memchr" name = "memchr"
version = "2.7.4" version = "2.7.4"
@ -1606,6 +1779,12 @@ dependencies = [
"tempfile", "tempfile",
] ]
[[package]]
name = "new_debug_unreachable"
version = "1.0.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "650eef8c711430f1a879fdd01d4745a7deea475becfb90269c06775983bbf086"
[[package]] [[package]]
name = "nom" name = "nom"
version = "7.1.3" version = "7.1.3"
@ -1622,7 +1801,7 @@ version = "8.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2fee8403b3d66ac7b26aee6e40a897d85dc5ce26f44da36b8b73e987cc52e943" checksum = "2fee8403b3d66ac7b26aee6e40a897d85dc5ce26f44da36b8b73e987cc52e943"
dependencies = [ dependencies = [
"bitflags 2.8.0", "bitflags 2.9.1",
"filetime", "filetime",
"fsevent-sys", "fsevent-sys",
"inotify", "inotify",
@ -1704,7 +1883,7 @@ version = "0.10.72"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fedfea7d58a1f73118430a55da6a286e7b044961736ce96a16a17068ea25e5da" checksum = "fedfea7d58a1f73118430a55da6a286e7b044961736ce96a16a17068ea25e5da"
dependencies = [ dependencies = [
"bitflags 2.8.0", "bitflags 2.9.1",
"cfg-if", "cfg-if",
"foreign-types", "foreign-types",
"libc", "libc",
@ -1841,7 +2020,7 @@ version = "0.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3ea27c5015ab81753fc61e49f8cde74999346605ee148bb20008ef3d3150e0dc" checksum = "3ea27c5015ab81753fc61e49f8cde74999346605ee148bb20008ef3d3150e0dc"
dependencies = [ dependencies = [
"bitflags 2.8.0", "bitflags 2.9.1",
"itoa", "itoa",
"memchr", "memchr",
"ryu", "ryu",
@ -1863,6 +2042,16 @@ dependencies = [
"phf_shared", "phf_shared",
] ]
[[package]]
name = "phf_codegen"
version = "0.11.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "aef8048c789fa5e851558d709946d6d79a8ff88c0440c587967f8e94bfb1216a"
dependencies = [
"phf_generator",
"phf_shared",
]
[[package]] [[package]]
name = "phf_generator" name = "phf_generator"
version = "0.11.3" version = "0.11.3"
@ -1975,6 +2164,12 @@ dependencies = [
"zerocopy", "zerocopy",
] ]
[[package]]
name = "precomputed-hash"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c"
[[package]] [[package]]
name = "proc-macro2" name = "proc-macro2"
version = "1.0.93" version = "1.0.93"
@ -1999,7 +2194,7 @@ version = "0.9.6"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "57206b407293d2bcd3af849ce869d52068623f19e1b5ff8e8778e3309439682b" checksum = "57206b407293d2bcd3af849ce869d52068623f19e1b5ff8e8778e3309439682b"
dependencies = [ dependencies = [
"bitflags 2.8.0", "bitflags 2.9.1",
"getopts", "getopts",
"memchr", "memchr",
"unicase", "unicase",
@ -2112,7 +2307,7 @@ version = "0.5.8"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "03a862b389f93e68874fbf580b9de08dd02facb9a788ebadaf4a3fd33cf58834" checksum = "03a862b389f93e68874fbf580b9de08dd02facb9a788ebadaf4a3fd33cf58834"
dependencies = [ dependencies = [
"bitflags 2.8.0", "bitflags 2.9.1",
] ]
[[package]] [[package]]
@ -2215,7 +2410,7 @@ version = "0.38.44"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154" checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154"
dependencies = [ dependencies = [
"bitflags 2.8.0", "bitflags 2.9.1",
"errno", "errno",
"libc", "libc",
"linux-raw-sys", "linux-raw-sys",
@ -2234,7 +2429,7 @@ version = "0.20.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fd3c7c96f8a08ee34eff8857b11b49b07d71d1c3f4e88f8a88d4c9e9f90b1702" checksum = "fd3c7c96f8a08ee34eff8857b11b49b07d71d1c3f4e88f8a88d4c9e9f90b1702"
dependencies = [ dependencies = [
"bitflags 2.8.0", "bitflags 2.9.1",
"bytemuck", "bytemuck",
"core_maths", "core_maths",
"log", "log",
@ -2276,13 +2471,28 @@ version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
[[package]]
name = "scraper"
version = "0.23.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "527e65d9d888567588db4c12da1087598d0f6f8b346cc2c5abc91f05fc2dffe2"
dependencies = [
"cssparser",
"ego-tree",
"getopts",
"html5ever",
"precomputed-hash",
"selectors",
"tendril",
]
[[package]] [[package]]
name = "security-framework" name = "security-framework"
version = "2.11.1" version = "2.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02" checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02"
dependencies = [ dependencies = [
"bitflags 2.8.0", "bitflags 2.9.1",
"core-foundation", "core-foundation",
"core-foundation-sys", "core-foundation-sys",
"libc", "libc",
@ -2299,6 +2509,25 @@ dependencies = [
"libc", "libc",
] ]
[[package]]
name = "selectors"
version = "0.26.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fd568a4c9bb598e291a08244a5c1f5a8a6650bee243b5b0f8dbb3d9cc1d87fe8"
dependencies = [
"bitflags 2.8.0",
"cssparser",
"derive_more",
"fxhash",
"log",
"new_debug_unreachable",
"phf",
"phf_codegen",
"precomputed-hash",
"servo_arc",
"smallvec",
]
[[package]] [[package]]
name = "self-replace" name = "self-replace"
version = "1.5.0" version = "1.5.0"
@ -2382,6 +2611,15 @@ dependencies = [
"unsafe-libyaml", "unsafe-libyaml",
] ]
[[package]]
name = "servo_arc"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ae65c4249478a2647db249fb43e23cec56a2c8974a427e7bd8cb5a1d0964921a"
dependencies = [
"stable_deref_trait",
]
[[package]] [[package]]
name = "shell-escape" name = "shell-escape"
version = "0.1.5" version = "0.1.5"
@ -2445,9 +2683,9 @@ dependencies = [
[[package]] [[package]]
name = "smallvec" name = "smallvec"
version = "1.13.2" version = "1.15.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03"
[[package]] [[package]]
name = "spin" name = "spin"
@ -2493,6 +2731,31 @@ dependencies = [
"serde", "serde",
] ]
[[package]]
name = "string_cache"
version = "0.8.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bf776ba3fa74f83bf4b63c3dcbbf82173db2632ed8452cb2d891d33f459de70f"
dependencies = [
"new_debug_unreachable",
"parking_lot",
"phf_shared",
"precomputed-hash",
"serde",
]
[[package]]
name = "string_cache_codegen"
version = "0.5.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c711928715f1fe0fe509c53b43e993a9a557babc2d0a3567d0a3006f1ac931a0"
dependencies = [
"phf_generator",
"phf_shared",
"proc-macro2",
"quote",
]
[[package]] [[package]]
name = "strsim" name = "strsim"
version = "0.11.1" version = "0.11.1"
@ -2615,6 +2878,17 @@ dependencies = [
"windows-sys 0.59.0", "windows-sys 0.59.0",
] ]
[[package]]
name = "tendril"
version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d24a120c5fc464a3458240ee02c299ebcb9d67b5249c8848b09d639dca8d7bb0"
dependencies = [
"futf",
"mac",
"utf-8",
]
[[package]] [[package]]
name = "termcolor" name = "termcolor"
version = "1.4.1" version = "1.4.1"
@ -2855,7 +3129,7 @@ dependencies = [
[[package]] [[package]]
name = "typst-assets" name = "typst-assets"
version = "0.13.1" version = "0.13.1"
source = "git+https://github.com/typst/typst-assets?rev=edf0d64#edf0d648376e29738a05a933af9ea99bb81557b1" source = "git+https://github.com/typst/typst-assets?rev=fbf00f9#fbf00f9539fdb0825bef4d39fb57d5986c51b756"
[[package]] [[package]]
name = "typst-cli" name = "typst-cli"
@ -2905,7 +3179,7 @@ dependencies = [
[[package]] [[package]]
name = "typst-dev-assets" name = "typst-dev-assets"
version = "0.13.1" version = "0.13.1"
source = "git+https://github.com/typst/typst-dev-assets?rev=bfa947f#bfa947f3433d7d13a995168c40ae788a2ebfe648" source = "git+https://github.com/typst/typst-dev-assets?rev=c6c2acf#c6c2acf6cdc31f99a23a478d3d614f8bf806a4f5"
[[package]] [[package]]
name = "typst-docs" name = "typst-docs"
@ -3055,7 +3329,7 @@ name = "typst-library"
version = "0.13.1" version = "0.13.1"
dependencies = [ dependencies = [
"az", "az",
"bitflags 2.8.0", "bitflags 2.9.1",
"bumpalo", "bumpalo",
"chinese-number", "chinese-number",
"ciborium", "ciborium",
@ -3063,10 +3337,12 @@ dependencies = [
"comemo", "comemo",
"csv", "csv",
"ecow", "ecow",
"ego-tree",
"flate2", "flate2",
"fontdb", "fontdb",
"glidesort", "glidesort",
"hayagriva", "hayagriva",
"hayro-syntax",
"icu_properties", "icu_properties",
"icu_provider", "icu_provider",
"icu_provider_blob", "icu_provider_blob",
@ -3086,6 +3362,7 @@ dependencies = [
"roxmltree", "roxmltree",
"rust_decimal", "rust_decimal",
"rustybuzz", "rustybuzz",
"scraper",
"serde", "serde",
"serde_json", "serde_json",
"serde_yaml 0.9.34+deprecated", "serde_yaml 0.9.34+deprecated",
@ -3165,11 +3442,13 @@ version = "0.13.1"
dependencies = [ dependencies = [
"bytemuck", "bytemuck",
"comemo", "comemo",
"hayro",
"image", "image",
"pixglyph", "pixglyph",
"resvg", "resvg",
"tiny-skia", "tiny-skia",
"ttf-parser", "ttf-parser",
"typst-assets",
"typst-library", "typst-library",
"typst-macros", "typst-macros",
"typst-timing", "typst-timing",
@ -3183,8 +3462,10 @@ dependencies = [
"comemo", "comemo",
"ecow", "ecow",
"flate2", "flate2",
"hayro",
"image", "image",
"ttf-parser", "ttf-parser",
"typst-assets",
"typst-library", "typst-library",
"typst-macros", "typst-macros",
"typst-timing", "typst-timing",
@ -3418,6 +3699,12 @@ dependencies = [
"xmlwriter", "xmlwriter",
] ]
[[package]]
name = "utf-8"
version = "0.7.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9"
[[package]] [[package]]
name = "utf16_iter" name = "utf16_iter"
version = "1.0.5" version = "1.0.5"
@ -3581,7 +3868,7 @@ version = "0.221.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9845c470a2e10b61dd42c385839cdd6496363ed63b5c9e420b5488b77bd22083" checksum = "9845c470a2e10b61dd42c385839cdd6496363ed63b5c9e420b5488b77bd22083"
dependencies = [ dependencies = [
"bitflags 2.8.0", "bitflags 2.9.1",
"indexmap 2.7.1", "indexmap 2.7.1",
] ]
@ -3716,7 +4003,7 @@ version = "0.33.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3268f3d866458b787f390cf61f4bbb563b922d091359f9608842999eaee3943c" checksum = "3268f3d866458b787f390cf61f4bbb563b922d091359f9608842999eaee3943c"
dependencies = [ dependencies = [
"bitflags 2.8.0", "bitflags 2.9.1",
] ]
[[package]] [[package]]

View File

@ -32,8 +32,8 @@ typst-svg = { path = "crates/typst-svg", version = "0.13.1" }
typst-syntax = { path = "crates/typst-syntax", version = "0.13.1" } typst-syntax = { path = "crates/typst-syntax", version = "0.13.1" }
typst-timing = { path = "crates/typst-timing", version = "0.13.1" } typst-timing = { path = "crates/typst-timing", version = "0.13.1" }
typst-utils = { path = "crates/typst-utils", version = "0.13.1" } typst-utils = { path = "crates/typst-utils", version = "0.13.1" }
typst-assets = { git = "https://github.com/typst/typst-assets", rev = "edf0d64" } typst-assets = { git = "https://github.com/typst/typst-assets", rev = "fbf00f9" }
typst-dev-assets = { git = "https://github.com/typst/typst-dev-assets", rev = "bfa947f" } typst-dev-assets = { git = "https://github.com/typst/typst-dev-assets", rev = "c6c2acf" }
arrayvec = "0.7.4" arrayvec = "0.7.4"
az = "1.2" az = "1.2"
base64 = "0.22" base64 = "0.22"
@ -54,6 +54,7 @@ csv = "1"
ctrlc = "3.4.1" ctrlc = "3.4.1"
dirs = "6" dirs = "6"
ecow = { version = "0.2", features = ["serde"] } ecow = { version = "0.2", features = ["serde"] }
ego-tree = "0.10"
env_proxy = "0.4" env_proxy = "0.4"
fastrand = "2.3" fastrand = "2.3"
flate2 = "1" flate2 = "1"
@ -61,6 +62,8 @@ fontdb = { version = "0.23", default-features = false }
fs_extra = "1.3" fs_extra = "1.3"
glidesort = "0.1.2" glidesort = "0.1.2"
hayagriva = "0.8.1" hayagriva = "0.8.1"
hayro-syntax = { git = "https://github.com/LaurenzV/hayro", rev = "e701f95" }
hayro = { git = "https://github.com/LaurenzV/hayro", rev = "e701f95" }
heck = "0.5" heck = "0.5"
hypher = "0.1.4" hypher = "0.1.4"
icu_properties = { version = "1.4", features = ["serde"] } icu_properties = { version = "1.4", features = ["serde"] }
@ -72,8 +75,8 @@ image = { version = "0.25.5", default-features = false, features = ["png", "jpeg
indexmap = { version = "2", features = ["serde"] } indexmap = { version = "2", features = ["serde"] }
infer = { version = "0.19.0", default-features = false } infer = { version = "0.19.0", default-features = false }
kamadak-exif = "0.6" kamadak-exif = "0.6"
krilla = { git = "https://github.com/LaurenzV/krilla", rev = "20c14fe", default-features = false, features = ["raster-images", "comemo", "rayon"] } krilla = { git = "https://github.com/LaurenzV/krilla", rev = "37b9a00", default-features = false, features = ["raster-images", "comemo", "rayon", "pdf"] }
krilla-svg = { git = "https://github.com/LaurenzV/krilla", rev = "20c14fe" } krilla-svg = { git = "https://github.com/LaurenzV/krilla", rev = "37b9a00"}
kurbo = "0.11" kurbo = "0.11"
libfuzzer-sys = "0.4" libfuzzer-sys = "0.4"
lipsum = "0.9" lipsum = "0.9"
@ -103,6 +106,7 @@ roxmltree = "0.20"
rust_decimal = { version = "1.36.0", default-features = false, features = ["maths"] } rust_decimal = { version = "1.36.0", default-features = false, features = ["maths"] }
rustybuzz = "0.20" rustybuzz = "0.20"
same-file = "1" same-file = "1"
scraper = "0.23.1"
self-replace = "1.3.7" self-replace = "1.3.7"
semver = "1" semver = "1"
serde = { version = "1.0.184", features = ["derive"] } serde = { version = "1.0.184", features = ["derive"] }

View File

@ -27,10 +27,12 @@ codex = { workspace = true }
comemo = { workspace = true } comemo = { workspace = true }
csv = { workspace = true } csv = { workspace = true }
ecow = { workspace = true } ecow = { workspace = true }
ego-tree = { workspace = true }
flate2 = { workspace = true } flate2 = { workspace = true }
fontdb = { workspace = true } fontdb = { workspace = true }
glidesort = { workspace = true } glidesort = { workspace = true }
hayagriva = { workspace = true } hayagriva = { workspace = true }
hayro-syntax = { workspace = true }
icu_properties = { workspace = true } icu_properties = { workspace = true }
icu_provider = { workspace = true } icu_provider = { workspace = true }
icu_provider_blob = { workspace = true } icu_provider_blob = { workspace = true }
@ -50,6 +52,7 @@ regex-syntax = { workspace = true }
roxmltree = { workspace = true } roxmltree = { workspace = true }
rust_decimal = { workspace = true } rust_decimal = { workspace = true }
rustybuzz = { workspace = true } rustybuzz = { workspace = true }
scraper = { workspace = true }
serde = { workspace = true } serde = { workspace = true }
serde_json = { workspace = true } serde_json = { workspace = true }
serde_yaml = { workspace = true } serde_yaml = { workspace = true }

View File

@ -0,0 +1,98 @@
use ecow::eco_format;
use ego_tree::NodeRef;
use scraper::Node;
use typst_syntax::Spanned;
use crate::diag::{At, FileError, SourceDiagnostic, SourceResult};
use crate::engine::Engine;
use crate::foundations::{dict, func, Array, Dict, IntoValue, Value};
use crate::loading::{DataSource, Load};
/// Reads structured data from an HTML file.
///
/// The HTML file is parsed into an array of dictionaries and strings. It is compatible with
/// the XML format, parsed by the [`xml`]($xml) function.
#[func(title = "HTML")]
pub fn html_decode(
engine: &mut Engine,
/// A [path]($syntax/#paths) to an HTML file or raw HTML bytes.
source: Spanned<DataSource>,
) -> SourceResult<Value> {
let data = source.load(engine.world)?;
let text = data.as_str().map_err(FileError::from).at(source.span)?;
let document = scraper::Html::parse_document(text);
if !document.errors.is_empty() {
let errors = document.errors.iter();
return Err(errors
.map(|msg| {
SourceDiagnostic::error(
source.span,
eco_format!("failed to parse HTML ({msg})"),
)
})
.collect());
}
Ok(convert_html(document.tree.root()))
}
/// Convert an HTML node to a Typst value.
fn convert_html(node_ref: NodeRef<Node>) -> Value {
// `prefix` and `name` are part of the tag name. For example,
// in the following HTML, `html5` is the prefix and `div` is the name:
// ```
// <html5:div class="example" />
// ```
let (prefix, name, attrs) = match node_ref.value() {
Node::Text(text) => return (*text).into_value(),
Node::Document => return Value::Array(convert_html_children(node_ref)),
// todo: the namespace is ignored
Node::Element(element) => {
(element.name.prefix.as_ref(), &*element.name.local, Some(element.attrs()))
}
Node::Fragment => (None, "fragment", None),
// todo: doc type and processing instruction are ignored
// https://en.wikipedia.org/wiki/Processing_Instruction
Node::Doctype(..) | Node::ProcessingInstruction(..) => return Value::None,
Node::Comment(comment) => {
return Value::Dict(dict! {
"tag" => "",
"attrs" => dict! {},
"children" => [(*comment).into_value()].into_iter().collect::<Array>(),
});
}
};
let children = convert_html_children(node_ref);
let attrs: Dict = attrs
.into_iter()
.flatten()
.map(|(name, value)| (name.into(), value.into_value()))
.collect();
let mut converted = dict! {
"tag" => name.into_value(),
"attrs" => attrs,
"children" => children,
};
// In most cases, the prefix is not set, so we only add it if it exists.
if let Some(prefix) = prefix {
converted.insert("prefix".into(), (*prefix).into_value());
}
Value::Dict(converted)
}
/// Convert children an HTML node to a Typst value.
fn convert_html_children(node_ref: NodeRef<Node>) -> Array {
node_ref
.children()
.filter(|v| {
!matches!(v.value(), Node::Doctype(..) | Node::ProcessingInstruction(..))
})
.map(convert_html)
.collect()
}

View File

@ -4,6 +4,8 @@
mod cbor_; mod cbor_;
#[path = "csv.rs"] #[path = "csv.rs"]
mod csv_; mod csv_;
#[path = "html.rs"]
mod html_;
#[path = "json.rs"] #[path = "json.rs"]
mod json_; mod json_;
#[path = "read.rs"] #[path = "read.rs"]
@ -21,6 +23,7 @@ use typst_syntax::{FileId, Spanned};
pub use self::cbor_::*; pub use self::cbor_::*;
pub use self::csv_::*; pub use self::csv_::*;
pub use self::html_::*;
pub use self::json_::*; pub use self::json_::*;
pub use self::read_::*; pub use self::read_::*;
pub use self::toml_::*; pub use self::toml_::*;
@ -37,6 +40,7 @@ pub(super) fn define(global: &mut Scope) {
global.start_category(crate::Category::DataLoading); global.start_category(crate::Category::DataLoading);
global.define_func::<read>(); global.define_func::<read>();
global.define_func::<csv>(); global.define_func::<csv>();
global.define_func::<html_decode>();
global.define_func::<json>(); global.define_func::<json>();
global.define_func::<toml>(); global.define_func::<toml>();
global.define_func::<yaml>(); global.define_func::<yaml>();

View File

@ -1,8 +1,10 @@
//! Image handling. //! Image handling.
mod pdf;
mod raster; mod raster;
mod svg; mod svg;
pub use self::pdf::PdfImage;
pub use self::raster::{ pub use self::raster::{
ExchangeFormat, PixelEncoding, PixelFormat, RasterFormat, RasterImage, ExchangeFormat, PixelEncoding, PixelFormat, RasterFormat, RasterImage,
}; };
@ -10,13 +12,15 @@ pub use self::svg::SvgImage;
use std::ffi::OsStr; use std::ffi::OsStr;
use std::fmt::{self, Debug, Formatter}; use std::fmt::{self, Debug, Formatter};
use std::num::NonZeroUsize;
use std::sync::Arc; use std::sync::Arc;
use ecow::EcoString; use ecow::EcoString;
use hayro_syntax::LoadPdfError;
use typst_syntax::{Span, Spanned}; use typst_syntax::{Span, Spanned};
use typst_utils::LazyHash; use typst_utils::{LazyHash, NonZeroExt};
use crate::diag::{At, LoadedWithin, SourceResult, StrResult, warning}; use crate::diag::{At, LoadedWithin, SourceResult, StrResult, bail, warning};
use crate::engine::Engine; use crate::engine::Engine;
use crate::foundations::{ use crate::foundations::{
Bytes, Cast, Content, Derived, NativeElement, Packed, Smart, StyleChain, cast, elem, Bytes, Cast, Content, Derived, NativeElement, Packed, Smart, StyleChain, cast, elem,
@ -26,6 +30,7 @@ use crate::layout::{Length, Rel, Sizing};
use crate::loading::{DataSource, Load, LoadSource, Loaded, Readable}; use crate::loading::{DataSource, Load, LoadSource, Loaded, Readable};
use crate::model::Figurable; use crate::model::Figurable;
use crate::text::{LocalName, families}; use crate::text::{LocalName, families};
use crate::visualize::image::pdf::PdfDocument;
/// A raster or vector graphic. /// A raster or vector graphic.
/// ///
@ -79,8 +84,7 @@ pub struct ImageElem {
/// format automatically, but that's not always possible). /// format automatically, but that's not always possible).
/// ///
/// Supported formats are `{"png"}`, `{"jpg"}`, `{"gif"}`, `{"svg"}`, /// Supported formats are `{"png"}`, `{"jpg"}`, `{"gif"}`, `{"svg"}`,
/// `{"webp"}` as well as raw pixel data. Embedding PDFs as images is /// `{"pdf"}`, `{"webp"}` as well as raw pixel data.
/// [not currently supported](https://github.com/typst/typst/issues/145).
/// ///
/// When providing raw pixel data as the `source`, you must specify a /// When providing raw pixel data as the `source`, you must specify a
/// dictionary with the following keys as the `format`: /// dictionary with the following keys as the `format`:
@ -126,6 +130,11 @@ pub struct ImageElem {
/// A text describing the image. /// A text describing the image.
pub alt: Option<EcoString>, pub alt: Option<EcoString>,
/// The page number that should be embedded as an image. This attribute only
/// has an effect for PDF files.
#[default(NonZeroUsize::ONE)]
pub page: NonZeroUsize,
/// How the image should adjust itself to a given area (the area is defined /// How the image should adjust itself to a given area (the area is defined
/// by the `width` and `height` fields). Note that `fit` doesn't visually /// by the `width` and `height` fields). Note that `fit` doesn't visually
/// change anything if the area's aspect ratio is the same as the image's /// change anything if the area's aspect ratio is the same as the image's
@ -261,6 +270,45 @@ impl Packed<ImageElem> {
) )
.within(loaded)?, .within(loaded)?,
), ),
ImageFormat::Vector(VectorFormat::Pdf) => {
let document = match PdfDocument::new(loaded.data.clone()) {
Ok(doc) => doc,
Err(e) => match e {
LoadPdfError::Encryption => {
bail!(
span,
"the PDF is encrypted or password-protected";
hint: "such PDFs are currently not supported";
hint: "preprocess the PDF to remove the encryption"
);
}
LoadPdfError::Invalid => {
bail!(
span,
"the PDF could not be loaded";
hint: "perhaps the PDF file is malformed"
);
}
},
};
// The user provides the page number start from 1, but further
// down the pipeline, page numbers are 0-based.
let page_num = self.page.get(styles).get();
let page_idx = page_num - 1;
let num_pages = document.num_pages();
let Some(pdf_image) = PdfImage::new(document, page_idx) else {
let s = if num_pages == 1 { "" } else { "s" };
bail!(
span,
"page {page_num} does not exist";
hint: "the document only has {num_pages} page{s}"
);
};
ImageKind::Pdf(pdf_image)
}
}; };
Ok(Image::new(kind, self.alt.get_cloned(styles), self.scaling.get(styles))) Ok(Image::new(kind, self.alt.get_cloned(styles), self.scaling.get(styles)))
@ -286,6 +334,7 @@ impl Packed<ImageElem> {
"jpg" | "jpeg" => return Ok(ExchangeFormat::Jpg.into()), "jpg" | "jpeg" => return Ok(ExchangeFormat::Jpg.into()),
"gif" => return Ok(ExchangeFormat::Gif.into()), "gif" => return Ok(ExchangeFormat::Gif.into()),
"svg" | "svgz" => return Ok(VectorFormat::Svg.into()), "svg" | "svgz" => return Ok(VectorFormat::Svg.into()),
"pdf" => return Ok(VectorFormat::Pdf.into()),
"webp" => return Ok(ExchangeFormat::Webp.into()), "webp" => return Ok(ExchangeFormat::Webp.into()),
_ => {} _ => {}
} }
@ -373,6 +422,7 @@ impl Image {
match &self.0.kind { match &self.0.kind {
ImageKind::Raster(raster) => raster.format().into(), ImageKind::Raster(raster) => raster.format().into(),
ImageKind::Svg(_) => VectorFormat::Svg.into(), ImageKind::Svg(_) => VectorFormat::Svg.into(),
ImageKind::Pdf(_) => VectorFormat::Pdf.into(),
} }
} }
@ -381,6 +431,7 @@ impl Image {
match &self.0.kind { match &self.0.kind {
ImageKind::Raster(raster) => raster.width() as f64, ImageKind::Raster(raster) => raster.width() as f64,
ImageKind::Svg(svg) => svg.width(), ImageKind::Svg(svg) => svg.width(),
ImageKind::Pdf(pdf) => pdf.width() as f64,
} }
} }
@ -389,6 +440,7 @@ impl Image {
match &self.0.kind { match &self.0.kind {
ImageKind::Raster(raster) => raster.height() as f64, ImageKind::Raster(raster) => raster.height() as f64,
ImageKind::Svg(svg) => svg.height(), ImageKind::Svg(svg) => svg.height(),
ImageKind::Pdf(pdf) => pdf.height() as f64,
} }
} }
@ -397,6 +449,7 @@ impl Image {
match &self.0.kind { match &self.0.kind {
ImageKind::Raster(raster) => raster.dpi(), ImageKind::Raster(raster) => raster.dpi(),
ImageKind::Svg(_) => Some(Image::USVG_DEFAULT_DPI), ImageKind::Svg(_) => Some(Image::USVG_DEFAULT_DPI),
ImageKind::Pdf(_) => Some(Image::DEFAULT_DPI),
} }
} }
@ -435,6 +488,8 @@ pub enum ImageKind {
Raster(RasterImage), Raster(RasterImage),
/// An SVG image. /// An SVG image.
Svg(SvgImage), Svg(SvgImage),
/// A PDF image.
Pdf(PdfImage),
} }
impl From<RasterImage> for ImageKind { impl From<RasterImage> for ImageKind {
@ -469,10 +524,20 @@ impl ImageFormat {
return Some(Self::Vector(VectorFormat::Svg)); return Some(Self::Vector(VectorFormat::Svg));
} }
if is_pdf(data) {
return Some(Self::Vector(VectorFormat::Pdf));
}
None None
} }
} }
/// Checks whether the data looks like a PDF file.
fn is_pdf(data: &[u8]) -> bool {
let head = &data[..data.len().min(2048)];
memchr::memmem::find(head, b"%PDF-").is_some()
}
/// Checks whether the data looks like an SVG or a compressed SVG. /// Checks whether the data looks like an SVG or a compressed SVG.
fn is_svg(data: &[u8]) -> bool { fn is_svg(data: &[u8]) -> bool {
// Check for the gzip magic bytes. This check is perhaps a bit too // Check for the gzip magic bytes. This check is perhaps a bit too
@ -493,6 +558,9 @@ fn is_svg(data: &[u8]) -> bool {
pub enum VectorFormat { pub enum VectorFormat {
/// The vector graphics format of the web. /// The vector graphics format of the web.
Svg, Svg,
/// High-fidelity document and graphics format, with focus on exact
/// reproduction in print.
Pdf,
} }
impl<R> From<R> for ImageFormat impl<R> From<R> for ImageFormat

View File

@ -0,0 +1,98 @@
use std::hash::{Hash, Hasher};
use std::sync::Arc;
use hayro_syntax::page::Page;
use hayro_syntax::{LoadPdfError, Pdf};
use crate::foundations::Bytes;
/// A PDF document.
#[derive(Clone, Hash)]
pub struct PdfDocument(Arc<DocumentRepr>);
/// The internal representation of a `PdfDocument`.
struct DocumentRepr {
pdf: Arc<Pdf>,
data: Bytes,
}
impl PdfDocument {
/// Loads a PDF document.
#[comemo::memoize]
#[typst_macros::time(name = "load pdf document")]
pub fn new(data: Bytes) -> Result<PdfDocument, LoadPdfError> {
let pdf = Arc::new(Pdf::new(Arc::new(data.clone()))?);
Ok(Self(Arc::new(DocumentRepr { data, pdf })))
}
/// Returns the underlying PDF document.
pub fn pdf(&self) -> &Arc<Pdf> {
&self.0.pdf
}
/// Return the number of pages in the PDF.
pub fn num_pages(&self) -> usize {
self.0.pdf.pages().len()
}
}
impl Hash for DocumentRepr {
fn hash<H: Hasher>(&self, state: &mut H) {
self.data.hash(state);
}
}
/// A specific page of a PDF acting as an image.
#[derive(Clone, Hash)]
pub struct PdfImage(Arc<ImageRepr>);
/// The internal representation of a `PdfImage`.
struct ImageRepr {
document: PdfDocument,
page_index: usize,
width: f32,
height: f32,
}
impl PdfImage {
/// Creates a new PDF image.
///
/// Returns `None` if the page index is not valid.
#[comemo::memoize]
pub fn new(document: PdfDocument, page_index: usize) -> Option<PdfImage> {
let (width, height) = document.0.pdf.pages().get(page_index)?.render_dimensions();
Some(Self(Arc::new(ImageRepr { document, page_index, width, height })))
}
/// Returns the underlying Typst PDF document.
pub fn document(&self) -> &PdfDocument {
&self.0.document
}
/// Returns the PDF page of the image.
pub fn page(&self) -> &Page {
&self.document().pdf().pages()[self.0.page_index]
}
/// Returns the width of the image.
pub fn width(&self) -> f32 {
self.0.width
}
/// Returns the height of the image.
pub fn height(&self) -> f32 {
self.0.height
}
/// Returns the page index of the image.
pub fn page_index(&self) -> usize {
self.0.page_index
}
}
impl Hash for ImageRepr {
fn hash<H: Hasher>(&self, state: &mut H) {
self.document.hash(state);
self.page_index.hash(state);
}
}

View File

@ -9,6 +9,7 @@ use krilla::embed::EmbedError;
use krilla::error::KrillaError; use krilla::error::KrillaError;
use krilla::geom::PathBuilder; use krilla::geom::PathBuilder;
use krilla::page::{PageLabel, PageSettings}; use krilla::page::{PageLabel, PageSettings};
use krilla::pdf::PdfError;
use krilla::surface::Surface; use krilla::surface::Surface;
use krilla::{Document, SerializeSettings}; use krilla::{Document, SerializeSettings};
use krilla_svg::render_svg_glyph; use krilla_svg::render_svg_glyph;
@ -363,6 +364,42 @@ fn finish(
hint: "convert the image to 8 bit instead" hint: "convert the image to 8 bit instead"
) )
} }
KrillaError::Pdf(_, e, loc) => {
let span = to_span(loc);
match e {
// We already validated in `typst-library` that the page index is valid.
PdfError::InvalidPage(_) => bail!(
span,
"invalid page number for PDF file";
hint: "please report this as a bug"
),
PdfError::VersionMismatch(v) => {
let pdf_ver = v.as_str();
let config_ver = configuration.version();
let cur_ver = config_ver.as_str();
bail!(span,
"the version of the PDF is too high";
hint: "the current export target is {cur_ver}, while the PDF has version {pdf_ver}";
hint: "raise the export target to {pdf_ver} or higher";
hint: "or preprocess the PDF to convert it to a lower version"
);
}
}
}
KrillaError::DuplicateTagId(_, loc) => {
let span = to_span(loc);
bail!(span,
"duplicate tag id";
hint: "please report this as a bug"
);
}
KrillaError::UnknownTagId(_, loc) => {
let span = to_span(loc);
bail!(span,
"unknown tag id";
hint: "please report this as a bug"
);
}
}, },
} }
} }
@ -535,12 +572,12 @@ fn convert_error(
} }
// The below errors cannot occur yet, only once Typst supports full PDF/A // The below errors cannot occur yet, only once Typst supports full PDF/A
// and PDF/UA. But let's still add a message just to be on the safe side. // and PDF/UA. But let's still add a message just to be on the safe side.
ValidationError::MissingAnnotationAltText => error!( ValidationError::MissingAnnotationAltText(_) => error!(
Span::detached(), Span::detached(),
"{prefix} missing annotation alt text"; "{prefix} missing annotation alt text";
hint: "please report this as a bug" hint: "please report this as a bug"
), ),
ValidationError::MissingAltText => error!( ValidationError::MissingAltText(_) => error!(
Span::detached(), Span::detached(),
"{prefix} missing alt text"; "{prefix} missing alt text";
hint: "make sure your images and equations have alt text" hint: "make sure your images and equations have alt text"
@ -576,6 +613,13 @@ fn convert_error(
"{prefix} missing document date"; "{prefix} missing document date";
hint: "set the date of the document" hint: "set the date of the document"
), ),
ValidationError::EmbeddedPDF(loc) => {
error!(
to_span(*loc),
"embedding PDFs is currently not supported in this export mode";
hint: "try converting the PDF to an SVG before embedding it"
)
}
} }
} }

View File

@ -3,13 +3,14 @@ use std::sync::{Arc, OnceLock};
use image::{DynamicImage, EncodableLayout, GenericImageView, Rgba}; use image::{DynamicImage, EncodableLayout, GenericImageView, Rgba};
use krilla::image::{BitsPerComponent, CustomImage, ImageColorspace}; use krilla::image::{BitsPerComponent, CustomImage, ImageColorspace};
use krilla::pdf::PdfDocument;
use krilla::surface::Surface; use krilla::surface::Surface;
use krilla_svg::{SurfaceExt, SvgSettings}; use krilla_svg::{SurfaceExt, SvgSettings};
use typst_library::diag::{SourceResult, bail}; use typst_library::diag::{SourceResult, bail};
use typst_library::foundations::Smart; use typst_library::foundations::Smart;
use typst_library::layout::{Abs, Angle, Ratio, Size, Transform}; use typst_library::layout::{Abs, Angle, Ratio, Size, Transform};
use typst_library::visualize::{ use typst_library::visualize::{
ExchangeFormat, Image, ImageKind, ImageScaling, RasterFormat, RasterImage, ExchangeFormat, Image, ImageKind, ImageScaling, PdfImage, RasterFormat, RasterImage,
}; };
use typst_syntax::Span; use typst_syntax::Span;
@ -60,6 +61,9 @@ pub(crate) fn handle_image(
SvgSettings { embed_text: true, ..Default::default() }, SvgSettings { embed_text: true, ..Default::default() },
); );
} }
ImageKind::Pdf(pdf) => {
surface.draw_pdf_page(&convert_pdf(pdf), size.to_krilla(), pdf.page_index())
}
} }
if image.alt().is_some() { if image.alt().is_some() {
@ -85,9 +89,9 @@ struct Repr {
/// A wrapper around `RasterImage` so that we can implement `CustomImage`. /// A wrapper around `RasterImage` so that we can implement `CustomImage`.
#[derive(Clone)] #[derive(Clone)]
struct PdfImage(Arc<Repr>); struct PdfRasterImage(Arc<Repr>);
impl PdfImage { impl PdfRasterImage {
pub fn new(raster: RasterImage) -> Self { pub fn new(raster: RasterImage) -> Self {
Self(Arc::new(Repr { Self(Arc::new(Repr {
raster, raster,
@ -97,7 +101,7 @@ impl PdfImage {
} }
} }
impl Hash for PdfImage { impl Hash for PdfRasterImage {
fn hash<H: Hasher>(&self, state: &mut H) { fn hash<H: Hasher>(&self, state: &mut H) {
// `alpha_channel` and `actual_dynamic` are generated from the underlying `RasterImage`, // `alpha_channel` and `actual_dynamic` are generated from the underlying `RasterImage`,
// so this is enough. Since `raster` is prehashed, this is also very cheap. // so this is enough. Since `raster` is prehashed, this is also very cheap.
@ -105,7 +109,7 @@ impl Hash for PdfImage {
} }
} }
impl CustomImage for PdfImage { impl CustomImage for PdfRasterImage {
fn color_channel(&self) -> &[u8] { fn color_channel(&self) -> &[u8] {
self.0 self.0
.actual_dynamic .actual_dynamic
@ -196,10 +200,15 @@ fn convert_raster(
interpolate, interpolate,
) )
} else { } else {
krilla::image::Image::from_custom(PdfImage::new(raster), interpolate) krilla::image::Image::from_custom(PdfRasterImage::new(raster), interpolate)
} }
} }
#[comemo::memoize]
fn convert_pdf(pdf: &PdfImage) -> PdfDocument {
PdfDocument::new(pdf.document().pdf().clone())
}
fn exif_transform(image: &RasterImage, size: Size) -> (Transform, Size) { fn exif_transform(image: &RasterImage, size: Size) -> (Transform, Size) {
let base = |hp: bool, vp: bool, mut base_ts: Transform, size: Size| { let base = |hp: bool, vp: bool, mut base_ts: Transform, size: Size| {
if hp { if hp {

View File

@ -49,7 +49,6 @@ pub(crate) fn handle_link(
fc.push_annotation( fc.push_annotation(
LinkAnnotation::new( LinkAnnotation::new(
rect, rect,
None,
Target::Action(Action::Link(LinkAction::new(u.to_string()))), Target::Action(Action::Link(LinkAction::new(u.to_string()))),
) )
.into(), .into(),
@ -64,7 +63,6 @@ pub(crate) fn handle_link(
fc.push_annotation( fc.push_annotation(
LinkAnnotation::new( LinkAnnotation::new(
rect, rect,
None,
Target::Destination(krilla::destination::Destination::Named( Target::Destination(krilla::destination::Destination::Named(
nd.clone(), nd.clone(),
)), )),
@ -83,7 +81,6 @@ pub(crate) fn handle_link(
fc.push_annotation( fc.push_annotation(
LinkAnnotation::new( LinkAnnotation::new(
rect, rect,
None,
Target::Destination(krilla::destination::Destination::Xyz( Target::Destination(krilla::destination::Destination::Xyz(
XyzDestination::new(index, pos.point.to_krilla()), XyzDestination::new(index, pos.point.to_krilla()),
)), )),

View File

@ -13,11 +13,13 @@ keywords = { workspace = true }
readme = { workspace = true } readme = { workspace = true }
[dependencies] [dependencies]
typst-assets = { workspace = true }
typst-library = { workspace = true } typst-library = { workspace = true }
typst-macros = { workspace = true } typst-macros = { workspace = true }
typst-timing = { workspace = true } typst-timing = { workspace = true }
bytemuck = { workspace = true } bytemuck = { workspace = true }
comemo = { workspace = true } comemo = { workspace = true }
hayro = { workspace = true }
image = { workspace = true } image = { workspace = true }
pixglyph = { workspace = true } pixglyph = { workspace = true }
resvg = { workspace = true } resvg = { workspace = true }

View File

@ -1,11 +1,12 @@
use std::sync::Arc; use hayro::{FontData, FontQuery, InterpreterSettings, RenderSettings, StandardFont};
use image::imageops::FilterType; use image::imageops::FilterType;
use image::{GenericImageView, Rgba}; use image::{GenericImageView, Rgba};
use std::sync::Arc;
use tiny_skia as sk; use tiny_skia as sk;
use tiny_skia::IntSize;
use typst_library::foundations::Smart; use typst_library::foundations::Smart;
use typst_library::layout::Size; use typst_library::layout::Size;
use typst_library::visualize::{Image, ImageKind, ImageScaling}; use typst_library::visualize::{Image, ImageKind, ImageScaling, PdfImage};
use crate::{AbsExt, State}; use crate::{AbsExt, State};
@ -59,9 +60,9 @@ pub fn render_image(
/// Prepare a texture for an image at a scaled size. /// Prepare a texture for an image at a scaled size.
#[comemo::memoize] #[comemo::memoize]
fn build_texture(image: &Image, w: u32, h: u32) -> Option<Arc<sk::Pixmap>> { fn build_texture(image: &Image, w: u32, h: u32) -> Option<Arc<sk::Pixmap>> {
let mut texture = sk::Pixmap::new(w, h)?; let texture = match image.kind() {
match image.kind() {
ImageKind::Raster(raster) => { ImageKind::Raster(raster) => {
let mut texture = sk::Pixmap::new(w, h)?;
let w = texture.width(); let w = texture.width();
let h = texture.height(); let h = texture.height();
@ -85,15 +86,63 @@ fn build_texture(image: &Image, w: u32, h: u32) -> Option<Arc<sk::Pixmap>> {
let Rgba([r, g, b, a]) = src; let Rgba([r, g, b, a]) = src;
*dest = sk::ColorU8::from_rgba(r, g, b, a).premultiply(); *dest = sk::ColorU8::from_rgba(r, g, b, a).premultiply();
} }
texture
} }
ImageKind::Svg(svg) => { ImageKind::Svg(svg) => {
let mut texture = sk::Pixmap::new(w, h)?;
let tree = svg.tree(); let tree = svg.tree();
let ts = tiny_skia::Transform::from_scale( let ts = tiny_skia::Transform::from_scale(
w as f32 / tree.size().width(), w as f32 / tree.size().width(),
h as f32 / tree.size().height(), h as f32 / tree.size().height(),
); );
resvg::render(tree, ts, &mut texture.as_mut()); resvg::render(tree, ts, &mut texture.as_mut());
texture
} }
} ImageKind::Pdf(pdf) => build_pdf_texture(pdf, w, h)?,
};
Some(Arc::new(texture)) Some(Arc::new(texture))
} }
// Keep this in sync with `typst-svg`!
fn build_pdf_texture(pdf: &PdfImage, w: u32, h: u32) -> Option<sk::Pixmap> {
let select_standard_font = move |font: StandardFont| -> Option<(FontData, u32)> {
let bytes = match font {
StandardFont::Helvetica => typst_assets::pdf::SANS,
StandardFont::HelveticaBold => typst_assets::pdf::SANS_BOLD,
StandardFont::HelveticaOblique => typst_assets::pdf::SANS_ITALIC,
StandardFont::HelveticaBoldOblique => typst_assets::pdf::SANS_BOLD_ITALIC,
StandardFont::Courier => typst_assets::pdf::FIXED,
StandardFont::CourierBold => typst_assets::pdf::FIXED_BOLD,
StandardFont::CourierOblique => typst_assets::pdf::FIXED_ITALIC,
StandardFont::CourierBoldOblique => typst_assets::pdf::FIXED_BOLD_ITALIC,
StandardFont::TimesRoman => typst_assets::pdf::SERIF,
StandardFont::TimesBold => typst_assets::pdf::SERIF_BOLD,
StandardFont::TimesItalic => typst_assets::pdf::SERIF_ITALIC,
StandardFont::TimesBoldItalic => typst_assets::pdf::SERIF_BOLD_ITALIC,
StandardFont::ZapfDingBats => typst_assets::pdf::DING_BATS,
StandardFont::Symbol => typst_assets::pdf::SYMBOL,
};
Some((Arc::new(bytes), 0))
};
let interpreter_settings = InterpreterSettings {
font_resolver: Arc::new(move |query| match query {
FontQuery::Standard(s) => select_standard_font(*s),
FontQuery::Fallback(f) => select_standard_font(f.pick_standard_font()),
}),
warning_sink: Arc::new(|_| {}),
};
let render_settings = RenderSettings {
x_scale: w as f32 / pdf.width(),
y_scale: h as f32 / pdf.height(),
width: Some(w as u16),
height: Some(h as u16),
};
let hayro_pix = hayro::render(pdf.page(), &interpreter_settings, &render_settings);
sk::Pixmap::from_vec(hayro_pix.take_u8(), IntSize::from_wh(w, h)?)
}

View File

@ -13,6 +13,7 @@ keywords = { workspace = true }
readme = { workspace = true } readme = { workspace = true }
[dependencies] [dependencies]
typst-assets = { workspace = true }
typst-library = { workspace = true } typst-library = { workspace = true }
typst-macros = { workspace = true } typst-macros = { workspace = true }
typst-timing = { workspace = true } typst-timing = { workspace = true }
@ -21,6 +22,7 @@ base64 = { workspace = true }
comemo = { workspace = true } comemo = { workspace = true }
ecow = { workspace = true } ecow = { workspace = true }
flate2 = { workspace = true } flate2 = { workspace = true }
hayro = { workspace = true }
image = { workspace = true } image = { workspace = true }
ttf-parser = { workspace = true } ttf-parser = { workspace = true }
xmlparser = { workspace = true } xmlparser = { workspace = true }

View File

@ -1,10 +1,13 @@
use std::sync::Arc;
use base64::Engine; use base64::Engine;
use ecow::{EcoString, eco_format}; use ecow::{EcoString, eco_format};
use hayro::{FontData, FontQuery, InterpreterSettings, RenderSettings, StandardFont};
use image::{ImageEncoder, codecs::png::PngEncoder}; use image::{ImageEncoder, codecs::png::PngEncoder};
use typst_library::foundations::Smart; use typst_library::foundations::Smart;
use typst_library::layout::{Abs, Axes}; use typst_library::layout::{Abs, Axes};
use typst_library::visualize::{ use typst_library::visualize::{
ExchangeFormat, Image, ImageKind, ImageScaling, RasterFormat, ExchangeFormat, Image, ImageKind, ImageScaling, PdfImage, RasterFormat,
}; };
use crate::SVGRenderer; use crate::SVGRenderer;
@ -66,6 +69,25 @@ pub fn convert_image_to_base64_url(image: &Image) -> EcoString {
}), }),
}, },
ImageKind::Svg(svg) => ("svg+xml", svg.data()), ImageKind::Svg(svg) => ("svg+xml", svg.data()),
ImageKind::Pdf(pdf) => {
// To make sure the image isn't pixelated, we always scale up so the
// lowest dimension has at least 1000 pixels. However, we only scale
// up as much so that the largest dimension doesn't exceed 3000
// pixels.
const MIN_RES: f32 = 1000.0;
const MAX_RES: f32 = 3000.0;
let base_width = pdf.width();
let w_scale = (MIN_RES / base_width).max(MAX_RES / base_width);
let base_height = pdf.height();
let h_scale = (MIN_RES / base_height).min(MAX_RES / base_height);
let total_scale = w_scale.min(h_scale);
let width = (base_width * total_scale).ceil() as u32;
let height = (base_height * total_scale).ceil() as u32;
buf = pdf_to_png(pdf, width, height);
("png", buf.as_slice())
}
}; };
let mut url = eco_format!("data:image/{format};base64,"); let mut url = eco_format!("data:image/{format};base64,");
@ -73,3 +95,45 @@ pub fn convert_image_to_base64_url(image: &Image) -> EcoString {
url.push_str(&data); url.push_str(&data);
url url
} }
// Keep this in sync with `typst-png`!
fn pdf_to_png(pdf: &PdfImage, w: u32, h: u32) -> Vec<u8> {
let select_standard_font = move |font: StandardFont| -> Option<(FontData, u32)> {
let bytes = match font {
StandardFont::Helvetica => typst_assets::pdf::SANS,
StandardFont::HelveticaBold => typst_assets::pdf::SANS_BOLD,
StandardFont::HelveticaOblique => typst_assets::pdf::SANS_ITALIC,
StandardFont::HelveticaBoldOblique => typst_assets::pdf::SANS_BOLD_ITALIC,
StandardFont::Courier => typst_assets::pdf::FIXED,
StandardFont::CourierBold => typst_assets::pdf::FIXED_BOLD,
StandardFont::CourierOblique => typst_assets::pdf::FIXED_ITALIC,
StandardFont::CourierBoldOblique => typst_assets::pdf::FIXED_BOLD_ITALIC,
StandardFont::TimesRoman => typst_assets::pdf::SERIF,
StandardFont::TimesBold => typst_assets::pdf::SERIF_BOLD,
StandardFont::TimesItalic => typst_assets::pdf::SERIF_ITALIC,
StandardFont::TimesBoldItalic => typst_assets::pdf::SERIF_BOLD_ITALIC,
StandardFont::ZapfDingBats => typst_assets::pdf::DING_BATS,
StandardFont::Symbol => typst_assets::pdf::SYMBOL,
};
Some((Arc::new(bytes), 0))
};
let interpreter_settings = InterpreterSettings {
font_resolver: Arc::new(move |query| match query {
FontQuery::Standard(s) => select_standard_font(*s),
FontQuery::Fallback(f) => select_standard_font(f.pick_standard_font()),
}),
warning_sink: Arc::new(|_| {}),
};
let render_settings = RenderSettings {
x_scale: w as f32 / pdf.width(),
y_scale: h as f32 / pdf.height(),
width: Some(w as u16),
height: Some(h as u16),
};
let hayro_pix = hayro::render(pdf.page(), &interpreter_settings, &render_settings);
hayro_pix.take_png()
}

View File

@ -665,12 +665,3 @@ applicable, contains possible workarounds.
[`page` function]($page) which will force a page break. If you just want a few [`page` function]($page) which will force a page break. If you just want a few
paragraphs to stretch into the margins, then reverting to the old margins, you paragraphs to stretch into the margins, then reverting to the old margins, you
can use the [`pad` function]($pad) with negative padding. can use the [`pad` function]($pad) with negative padding.
- **Include PDFs as images.** In LaTeX, it has become customary to insert vector
graphics as PDF or EPS files. Typst supports neither format as an image
format, but you can easily convert both into SVG files with [online
tools](https://cloudconvert.com/pdf-to-svg) or
[Inkscape](https://inkscape.org/). The web app will automatically convert PDF
files to SVG files upon uploading them. You can also use the
community-provided [`muchpdf` package](https://typst.app/universe/package/muchpdf)
to embed PDFs. It internally converts PDFs to SVGs on-the-fly.

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.4 KiB

BIN
tests/ref/image-pdf.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.9 KiB

View File

@ -0,0 +1,46 @@
--- html ---
// Test reading XML data.
#let data = html-decode("/assets/text/example.html")
#test(data, ((
tag: "html",
attrs: (:),
children: (
(
tag: "head",
attrs: (:),
children: (
"\n ",
(
tag: "meta",
attrs: (charset: "UTF-8"),
children: (),
),
"\n ",
(
tag: "title",
attrs: (:),
children: ("Example document",),
),
"\n ",
),
),
"\n ",
(
tag: "body",
attrs: (:),
children: (
"\n ",
(
tag: "h1",
attrs: (:),
children: ("Hello, world!",),
),
"\n \n\n",
),
),
),
),))
--- html-invalid ---
// Error: 14-38 failed to parse HTML (Unexpected token)
#html-decode("/assets/text/hello.txt")

View File

@ -258,7 +258,7 @@ A #box(image("/assets/images/tiger.jpg", height: 1cm, width: 80%)) B
--- image-png-but-pixmap-format --- --- image-png-but-pixmap-format ---
#image( #image(
read("/assets/images/tiger.jpg", encoding: none), read("/assets/images/tiger.jpg", encoding: none),
// Error: 11-18 expected "png", "jpg", "gif", "webp", dictionary, "svg", or auto // Error: 11-18 expected "png", "jpg", "gif", "webp", dictionary, "svg", "pdf", or auto
format: "rgba8", format: "rgba8",
) )
@ -289,3 +289,16 @@ A #box(image("/assets/images/tiger.jpg", height: 1cm, width: 80%)) B
..rotations.map(v => raw(str(v), lang: "typc")), ..rotations.map(v => raw(str(v), lang: "typc")),
..rotations.map(rotated) ..rotations.map(rotated)
) )
--- image-pdf ---
#image("/assets/images/matplotlib.pdf")
--- image-pdf-multiple-pages ---
#image("/assets/images/diagrams.pdf", page: 1)
#image("/assets/images/diagrams.pdf", page: 3)
#image("/assets/images/diagrams.pdf", page: 2)
--- image-pdf-invalid-page ---
// Error: 2-49 page 2 does not exist
// Hint: 2-49 the document only has 1 page
#image("/assets/images/matplotlib.pdf", page: 2)