mirror of
https://github.com/typst/typst
synced 2025-07-27 22:37:54 +08:00
Merge eaf63ca80cdd13b6ef801262ab1b47c82dc0fd4a into b1c79b50d4253e7acb839a93450311c1fca12ac8
This commit is contained in:
commit
8e44039182
217
Cargo.lock
generated
217
Cargo.lock
generated
@ -545,6 +545,29 @@ version = "0.2.3"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "43da5946c66ffcc7745f48db692ffbb10a83bfe0afd96235c5c2a4fb23994929"
|
checksum = "43da5946c66ffcc7745f48db692ffbb10a83bfe0afd96235c5c2a4fb23994929"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "cssparser"
|
||||||
|
version = "0.34.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "b7c66d1cd8ed61bf80b38432613a7a2f09401ab8d0501110655f8b341484a3e3"
|
||||||
|
dependencies = [
|
||||||
|
"cssparser-macros",
|
||||||
|
"dtoa-short",
|
||||||
|
"itoa",
|
||||||
|
"phf",
|
||||||
|
"smallvec",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "cssparser-macros"
|
||||||
|
version = "0.6.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "13b588ba4ac1a99f7f2964d24b3d896ddc6bf847ee3855dbd4366f058cfcd331"
|
||||||
|
dependencies = [
|
||||||
|
"quote",
|
||||||
|
"syn",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "csv"
|
name = "csv"
|
||||||
version = "1.3.1"
|
version = "1.3.1"
|
||||||
@ -592,6 +615,17 @@ dependencies = [
|
|||||||
"syn",
|
"syn",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "derive_more"
|
||||||
|
version = "0.99.20"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "6edb4b64a43d977b8e99788fe3a04d483834fba1215a7e02caa415b626497f7f"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"syn",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "dirs"
|
name = "dirs"
|
||||||
version = "6.0.0"
|
version = "6.0.0"
|
||||||
@ -630,6 +664,21 @@ version = "1.2.1"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "75b325c5dbd37f80359721ad39aca5a29fb04c89279657cffdda8736d0c0b9d2"
|
checksum = "75b325c5dbd37f80359721ad39aca5a29fb04c89279657cffdda8736d0c0b9d2"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "dtoa"
|
||||||
|
version = "1.0.10"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "d6add3b8cff394282be81f3fc1a0605db594ed69890078ca6e2cab1c408bcf04"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "dtoa-short"
|
||||||
|
version = "0.3.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "cd1511a7b6a56299bd043a9c167a6d2bfb37bf84a6dfceaba651168adfb43c87"
|
||||||
|
dependencies = [
|
||||||
|
"dtoa",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "ecow"
|
name = "ecow"
|
||||||
version = "0.2.3"
|
version = "0.2.3"
|
||||||
@ -639,6 +688,12 @@ dependencies = [
|
|||||||
"serde",
|
"serde",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "ego-tree"
|
||||||
|
version = "0.10.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "b2972feb8dffe7bc8c5463b1dacda1b0dfbed3710e50f977d965429692d74cd8"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "either"
|
name = "either"
|
||||||
version = "1.13.0"
|
version = "1.13.0"
|
||||||
@ -861,6 +916,16 @@ version = "2.0.0"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c"
|
checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "futf"
|
||||||
|
version = "0.1.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "df420e2e84819663797d1ec6544b13c5be84629e7bb00dc960d6917db2987843"
|
||||||
|
dependencies = [
|
||||||
|
"mac",
|
||||||
|
"new_debug_unreachable",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "fxhash"
|
name = "fxhash"
|
||||||
version = "0.2.1"
|
version = "0.2.1"
|
||||||
@ -970,6 +1035,18 @@ version = "0.5.0"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
|
checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "html5ever"
|
||||||
|
version = "0.29.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "3b7410cae13cbc75623c98ac4cbfd1f0bedddf3227afc24f370cf0f50a44a11c"
|
||||||
|
dependencies = [
|
||||||
|
"log",
|
||||||
|
"mac",
|
||||||
|
"markup5ever",
|
||||||
|
"match_token",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "httpdate"
|
name = "httpdate"
|
||||||
version = "1.0.3"
|
version = "1.0.3"
|
||||||
@ -1540,6 +1617,37 @@ dependencies = [
|
|||||||
"pkg-config",
|
"pkg-config",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "mac"
|
||||||
|
version = "0.1.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "markup5ever"
|
||||||
|
version = "0.14.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "c7a7213d12e1864c0f002f52c2923d4556935a43dec5e71355c2760e0f6e7a18"
|
||||||
|
dependencies = [
|
||||||
|
"log",
|
||||||
|
"phf",
|
||||||
|
"phf_codegen",
|
||||||
|
"string_cache",
|
||||||
|
"string_cache_codegen",
|
||||||
|
"tendril",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "match_token"
|
||||||
|
version = "0.1.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "88a9689d8d44bf9964484516275f5cd4c9b59457a6940c1d5d0ecbb94510a36b"
|
||||||
|
dependencies = [
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
"syn",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "memchr"
|
name = "memchr"
|
||||||
version = "2.7.4"
|
version = "2.7.4"
|
||||||
@ -1612,6 +1720,12 @@ dependencies = [
|
|||||||
"tempfile",
|
"tempfile",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "new_debug_unreachable"
|
||||||
|
version = "1.0.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "650eef8c711430f1a879fdd01d4745a7deea475becfb90269c06775983bbf086"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "nom"
|
name = "nom"
|
||||||
version = "7.1.3"
|
version = "7.1.3"
|
||||||
@ -1869,6 +1983,16 @@ dependencies = [
|
|||||||
"phf_shared",
|
"phf_shared",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "phf_codegen"
|
||||||
|
version = "0.11.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "aef8048c789fa5e851558d709946d6d79a8ff88c0440c587967f8e94bfb1216a"
|
||||||
|
dependencies = [
|
||||||
|
"phf_generator",
|
||||||
|
"phf_shared",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "phf_generator"
|
name = "phf_generator"
|
||||||
version = "0.11.3"
|
version = "0.11.3"
|
||||||
@ -1981,6 +2105,12 @@ dependencies = [
|
|||||||
"zerocopy",
|
"zerocopy",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "precomputed-hash"
|
||||||
|
version = "0.1.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "proc-macro2"
|
name = "proc-macro2"
|
||||||
version = "1.0.93"
|
version = "1.0.93"
|
||||||
@ -2282,6 +2412,21 @@ version = "1.2.0"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
|
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "scraper"
|
||||||
|
version = "0.23.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "527e65d9d888567588db4c12da1087598d0f6f8b346cc2c5abc91f05fc2dffe2"
|
||||||
|
dependencies = [
|
||||||
|
"cssparser",
|
||||||
|
"ego-tree",
|
||||||
|
"getopts",
|
||||||
|
"html5ever",
|
||||||
|
"precomputed-hash",
|
||||||
|
"selectors",
|
||||||
|
"tendril",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "security-framework"
|
name = "security-framework"
|
||||||
version = "2.11.1"
|
version = "2.11.1"
|
||||||
@ -2305,6 +2450,25 @@ dependencies = [
|
|||||||
"libc",
|
"libc",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "selectors"
|
||||||
|
version = "0.26.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "fd568a4c9bb598e291a08244a5c1f5a8a6650bee243b5b0f8dbb3d9cc1d87fe8"
|
||||||
|
dependencies = [
|
||||||
|
"bitflags 2.8.0",
|
||||||
|
"cssparser",
|
||||||
|
"derive_more",
|
||||||
|
"fxhash",
|
||||||
|
"log",
|
||||||
|
"new_debug_unreachable",
|
||||||
|
"phf",
|
||||||
|
"phf_codegen",
|
||||||
|
"precomputed-hash",
|
||||||
|
"servo_arc",
|
||||||
|
"smallvec",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "self-replace"
|
name = "self-replace"
|
||||||
version = "1.5.0"
|
version = "1.5.0"
|
||||||
@ -2388,6 +2552,15 @@ dependencies = [
|
|||||||
"unsafe-libyaml",
|
"unsafe-libyaml",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "servo_arc"
|
||||||
|
version = "0.4.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "ae65c4249478a2647db249fb43e23cec56a2c8974a427e7bd8cb5a1d0964921a"
|
||||||
|
dependencies = [
|
||||||
|
"stable_deref_trait",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "shell-escape"
|
name = "shell-escape"
|
||||||
version = "0.1.5"
|
version = "0.1.5"
|
||||||
@ -2499,6 +2672,31 @@ dependencies = [
|
|||||||
"serde",
|
"serde",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "string_cache"
|
||||||
|
version = "0.8.9"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "bf776ba3fa74f83bf4b63c3dcbbf82173db2632ed8452cb2d891d33f459de70f"
|
||||||
|
dependencies = [
|
||||||
|
"new_debug_unreachable",
|
||||||
|
"parking_lot",
|
||||||
|
"phf_shared",
|
||||||
|
"precomputed-hash",
|
||||||
|
"serde",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "string_cache_codegen"
|
||||||
|
version = "0.5.4"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "c711928715f1fe0fe509c53b43e993a9a557babc2d0a3567d0a3006f1ac931a0"
|
||||||
|
dependencies = [
|
||||||
|
"phf_generator",
|
||||||
|
"phf_shared",
|
||||||
|
"proc-macro2",
|
||||||
|
"quote",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "strsim"
|
name = "strsim"
|
||||||
version = "0.11.1"
|
version = "0.11.1"
|
||||||
@ -2621,6 +2819,17 @@ dependencies = [
|
|||||||
"windows-sys 0.59.0",
|
"windows-sys 0.59.0",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "tendril"
|
||||||
|
version = "0.4.3"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "d24a120c5fc464a3458240ee02c299ebcb9d67b5249c8848b09d639dca8d7bb0"
|
||||||
|
dependencies = [
|
||||||
|
"futf",
|
||||||
|
"mac",
|
||||||
|
"utf-8",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "termcolor"
|
name = "termcolor"
|
||||||
version = "1.4.1"
|
version = "1.4.1"
|
||||||
@ -3071,6 +3280,7 @@ dependencies = [
|
|||||||
"comemo",
|
"comemo",
|
||||||
"csv",
|
"csv",
|
||||||
"ecow",
|
"ecow",
|
||||||
|
"ego-tree",
|
||||||
"flate2",
|
"flate2",
|
||||||
"fontdb",
|
"fontdb",
|
||||||
"glidesort",
|
"glidesort",
|
||||||
@ -3094,6 +3304,7 @@ dependencies = [
|
|||||||
"roxmltree",
|
"roxmltree",
|
||||||
"rust_decimal",
|
"rust_decimal",
|
||||||
"rustybuzz",
|
"rustybuzz",
|
||||||
|
"scraper",
|
||||||
"serde",
|
"serde",
|
||||||
"serde_json",
|
"serde_json",
|
||||||
"serde_yaml 0.9.34+deprecated",
|
"serde_yaml 0.9.34+deprecated",
|
||||||
@ -3426,6 +3637,12 @@ dependencies = [
|
|||||||
"xmlwriter",
|
"xmlwriter",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "utf-8"
|
||||||
|
version = "0.7.6"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "utf16_iter"
|
name = "utf16_iter"
|
||||||
version = "1.0.5"
|
version = "1.0.5"
|
||||||
|
@ -54,6 +54,7 @@ csv = "1"
|
|||||||
ctrlc = "3.4.1"
|
ctrlc = "3.4.1"
|
||||||
dirs = "6"
|
dirs = "6"
|
||||||
ecow = { version = "0.2", features = ["serde"] }
|
ecow = { version = "0.2", features = ["serde"] }
|
||||||
|
ego-tree = "0.10"
|
||||||
env_proxy = "0.4"
|
env_proxy = "0.4"
|
||||||
fastrand = "2.3"
|
fastrand = "2.3"
|
||||||
flate2 = "1"
|
flate2 = "1"
|
||||||
@ -104,6 +105,7 @@ roxmltree = "0.20"
|
|||||||
rust_decimal = { version = "1.36.0", default-features = false, features = ["maths"] }
|
rust_decimal = { version = "1.36.0", default-features = false, features = ["maths"] }
|
||||||
rustybuzz = "0.20"
|
rustybuzz = "0.20"
|
||||||
same-file = "1"
|
same-file = "1"
|
||||||
|
scraper = "0.23.1"
|
||||||
self-replace = "1.3.7"
|
self-replace = "1.3.7"
|
||||||
semver = "1"
|
semver = "1"
|
||||||
serde = { version = "1.0.184", features = ["derive"] }
|
serde = { version = "1.0.184", features = ["derive"] }
|
||||||
|
@ -27,6 +27,7 @@ codex = { workspace = true }
|
|||||||
comemo = { workspace = true }
|
comemo = { workspace = true }
|
||||||
csv = { workspace = true }
|
csv = { workspace = true }
|
||||||
ecow = { workspace = true }
|
ecow = { workspace = true }
|
||||||
|
ego-tree = { workspace = true }
|
||||||
flate2 = { workspace = true }
|
flate2 = { workspace = true }
|
||||||
fontdb = { workspace = true }
|
fontdb = { workspace = true }
|
||||||
glidesort = { workspace = true }
|
glidesort = { workspace = true }
|
||||||
@ -50,6 +51,7 @@ regex-syntax = { workspace = true }
|
|||||||
roxmltree = { workspace = true }
|
roxmltree = { workspace = true }
|
||||||
rust_decimal = { workspace = true }
|
rust_decimal = { workspace = true }
|
||||||
rustybuzz = { workspace = true }
|
rustybuzz = { workspace = true }
|
||||||
|
scraper = { workspace = true }
|
||||||
serde = { workspace = true }
|
serde = { workspace = true }
|
||||||
serde_json = { workspace = true }
|
serde_json = { workspace = true }
|
||||||
serde_yaml = { workspace = true }
|
serde_yaml = { workspace = true }
|
||||||
|
98
crates/typst-library/src/loading/html.rs
Normal file
98
crates/typst-library/src/loading/html.rs
Normal file
@ -0,0 +1,98 @@
|
|||||||
|
use ecow::eco_format;
|
||||||
|
use ego_tree::NodeRef;
|
||||||
|
use scraper::Node;
|
||||||
|
use typst_syntax::Spanned;
|
||||||
|
|
||||||
|
use crate::diag::{At, FileError, SourceDiagnostic, SourceResult};
|
||||||
|
use crate::engine::Engine;
|
||||||
|
use crate::foundations::{dict, func, Array, Dict, IntoValue, Value};
|
||||||
|
use crate::loading::{DataSource, Load};
|
||||||
|
|
||||||
|
/// Reads structured data from an HTML file.
|
||||||
|
///
|
||||||
|
/// The HTML file is parsed into an array of dictionaries and strings. It is compatible with
|
||||||
|
/// the XML format, parsed by the [`xml`]($xml) function.
|
||||||
|
#[func(title = "HTML")]
|
||||||
|
pub fn html_decode(
|
||||||
|
engine: &mut Engine,
|
||||||
|
/// A [path]($syntax/#paths) to an HTML file or raw HTML bytes.
|
||||||
|
source: Spanned<DataSource>,
|
||||||
|
) -> SourceResult<Value> {
|
||||||
|
let data = source.load(engine.world)?;
|
||||||
|
let text = data.as_str().map_err(FileError::from).at(source.span)?;
|
||||||
|
let document = scraper::Html::parse_document(text);
|
||||||
|
|
||||||
|
if !document.errors.is_empty() {
|
||||||
|
let errors = document.errors.iter();
|
||||||
|
return Err(errors
|
||||||
|
.map(|msg| {
|
||||||
|
SourceDiagnostic::error(
|
||||||
|
source.span,
|
||||||
|
eco_format!("failed to parse HTML ({msg})"),
|
||||||
|
)
|
||||||
|
})
|
||||||
|
.collect());
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(convert_html(document.tree.root()))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Convert an HTML node to a Typst value.
|
||||||
|
fn convert_html(node_ref: NodeRef<Node>) -> Value {
|
||||||
|
// `prefix` and `name` are part of the tag name. For example,
|
||||||
|
// in the following HTML, `html5` is the prefix and `div` is the name:
|
||||||
|
// ```
|
||||||
|
// <html5:div class="example" />
|
||||||
|
// ```
|
||||||
|
let (prefix, name, attrs) = match node_ref.value() {
|
||||||
|
Node::Text(text) => return (*text).into_value(),
|
||||||
|
Node::Document => return Value::Array(convert_html_children(node_ref)),
|
||||||
|
// todo: the namespace is ignored
|
||||||
|
Node::Element(element) => {
|
||||||
|
(element.name.prefix.as_ref(), &*element.name.local, Some(element.attrs()))
|
||||||
|
}
|
||||||
|
Node::Fragment => (None, "fragment", None),
|
||||||
|
// todo: doc type and processing instruction are ignored
|
||||||
|
// https://en.wikipedia.org/wiki/Processing_Instruction
|
||||||
|
Node::Doctype(..) | Node::ProcessingInstruction(..) => return Value::None,
|
||||||
|
Node::Comment(comment) => {
|
||||||
|
return Value::Dict(dict! {
|
||||||
|
"tag" => "",
|
||||||
|
"attrs" => dict! {},
|
||||||
|
"children" => [(*comment).into_value()].into_iter().collect::<Array>(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let children = convert_html_children(node_ref);
|
||||||
|
|
||||||
|
let attrs: Dict = attrs
|
||||||
|
.into_iter()
|
||||||
|
.flatten()
|
||||||
|
.map(|(name, value)| (name.into(), value.into_value()))
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
let mut converted = dict! {
|
||||||
|
"tag" => name.into_value(),
|
||||||
|
"attrs" => attrs,
|
||||||
|
"children" => children,
|
||||||
|
};
|
||||||
|
|
||||||
|
// In most cases, the prefix is not set, so we only add it if it exists.
|
||||||
|
if let Some(prefix) = prefix {
|
||||||
|
converted.insert("prefix".into(), (*prefix).into_value());
|
||||||
|
}
|
||||||
|
|
||||||
|
Value::Dict(converted)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Convert children an HTML node to a Typst value.
|
||||||
|
fn convert_html_children(node_ref: NodeRef<Node>) -> Array {
|
||||||
|
node_ref
|
||||||
|
.children()
|
||||||
|
.filter(|v| {
|
||||||
|
!matches!(v.value(), Node::Doctype(..) | Node::ProcessingInstruction(..))
|
||||||
|
})
|
||||||
|
.map(convert_html)
|
||||||
|
.collect()
|
||||||
|
}
|
@ -4,6 +4,8 @@
|
|||||||
mod cbor_;
|
mod cbor_;
|
||||||
#[path = "csv.rs"]
|
#[path = "csv.rs"]
|
||||||
mod csv_;
|
mod csv_;
|
||||||
|
#[path = "html.rs"]
|
||||||
|
mod html_;
|
||||||
#[path = "json.rs"]
|
#[path = "json.rs"]
|
||||||
mod json_;
|
mod json_;
|
||||||
#[path = "read.rs"]
|
#[path = "read.rs"]
|
||||||
@ -21,6 +23,7 @@ use typst_syntax::{FileId, Spanned};
|
|||||||
|
|
||||||
pub use self::cbor_::*;
|
pub use self::cbor_::*;
|
||||||
pub use self::csv_::*;
|
pub use self::csv_::*;
|
||||||
|
pub use self::html_::*;
|
||||||
pub use self::json_::*;
|
pub use self::json_::*;
|
||||||
pub use self::read_::*;
|
pub use self::read_::*;
|
||||||
pub use self::toml_::*;
|
pub use self::toml_::*;
|
||||||
@ -37,6 +40,7 @@ pub(super) fn define(global: &mut Scope) {
|
|||||||
global.start_category(crate::Category::DataLoading);
|
global.start_category(crate::Category::DataLoading);
|
||||||
global.define_func::<read>();
|
global.define_func::<read>();
|
||||||
global.define_func::<csv>();
|
global.define_func::<csv>();
|
||||||
|
global.define_func::<html_decode>();
|
||||||
global.define_func::<json>();
|
global.define_func::<json>();
|
||||||
global.define_func::<toml>();
|
global.define_func::<toml>();
|
||||||
global.define_func::<yaml>();
|
global.define_func::<yaml>();
|
||||||
|
46
tests/suite/loading/html.typ
Normal file
46
tests/suite/loading/html.typ
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
--- html ---
|
||||||
|
// Test reading XML data.
|
||||||
|
#let data = html-decode("/assets/text/example.html")
|
||||||
|
#test(data, ((
|
||||||
|
tag: "html",
|
||||||
|
attrs: (:),
|
||||||
|
children: (
|
||||||
|
(
|
||||||
|
tag: "head",
|
||||||
|
attrs: (:),
|
||||||
|
children: (
|
||||||
|
"\n ",
|
||||||
|
(
|
||||||
|
tag: "meta",
|
||||||
|
attrs: (charset: "UTF-8"),
|
||||||
|
children: (),
|
||||||
|
),
|
||||||
|
"\n ",
|
||||||
|
(
|
||||||
|
tag: "title",
|
||||||
|
attrs: (:),
|
||||||
|
children: ("Example document",),
|
||||||
|
),
|
||||||
|
"\n ",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
"\n ",
|
||||||
|
(
|
||||||
|
tag: "body",
|
||||||
|
attrs: (:),
|
||||||
|
children: (
|
||||||
|
"\n ",
|
||||||
|
(
|
||||||
|
tag: "h1",
|
||||||
|
attrs: (:),
|
||||||
|
children: ("Hello, world!",),
|
||||||
|
),
|
||||||
|
"\n \n\n",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
),
|
||||||
|
),))
|
||||||
|
|
||||||
|
--- html-invalid ---
|
||||||
|
// Error: 14-38 failed to parse HTML (Unexpected token)
|
||||||
|
#html-decode("/assets/text/hello.txt")
|
Loading…
x
Reference in New Issue
Block a user