mirror of
https://github.com/typst/typst
synced 2025-05-15 01:25:28 +08:00
117 lines
3.3 KiB
Rust
117 lines
3.3 KiB
Rust
use ecow::EcoString;
|
|
use roxmltree::ParsingOptions;
|
|
use typst_syntax::Spanned;
|
|
|
|
use crate::diag::{format_xml_like_error, At, FileError, SourceResult};
|
|
use crate::engine::Engine;
|
|
use crate::foundations::{dict, func, scope, Array, Dict, IntoValue, Str, Value};
|
|
use crate::loading::{DataSource, Load, Readable};
|
|
|
|
/// Reads structured data from an XML file.
|
|
///
|
|
/// The XML file is parsed into an array of dictionaries and strings. XML nodes
|
|
/// can be elements or strings. Elements are represented as dictionaries with
|
|
/// the following keys:
|
|
///
|
|
/// - `tag`: The name of the element as a string.
|
|
/// - `attrs`: A dictionary of the element's attributes as strings.
|
|
/// - `children`: An array of the element's child nodes.
|
|
///
|
|
/// The XML file in the example contains a root `news` tag with multiple
|
|
/// `article` tags. Each article has a `title`, `author`, and `content` tag. The
|
|
/// `content` tag contains one or more paragraphs, which are represented as `p`
|
|
/// tags.
|
|
///
|
|
/// # Example
|
|
/// ```example
|
|
/// #let find-child(elem, tag) = {
|
|
/// elem.children
|
|
/// .find(e => "tag" in e and e.tag == tag)
|
|
/// }
|
|
///
|
|
/// #let article(elem) = {
|
|
/// let title = find-child(elem, "title")
|
|
/// let author = find-child(elem, "author")
|
|
/// let pars = find-child(elem, "content")
|
|
///
|
|
/// [= #title.children.first()]
|
|
/// text(10pt, weight: "medium")[
|
|
/// Published by
|
|
/// #author.children.first()
|
|
/// ]
|
|
///
|
|
/// for p in pars.children {
|
|
/// if type(p) == dictionary {
|
|
/// parbreak()
|
|
/// p.children.first()
|
|
/// }
|
|
/// }
|
|
/// }
|
|
///
|
|
/// #let data = xml("example.xml")
|
|
/// #for elem in data.first().children {
|
|
/// if type(elem) == dictionary {
|
|
/// article(elem)
|
|
/// }
|
|
/// }
|
|
/// ```
|
|
#[func(scope, title = "XML")]
|
|
pub fn xml(
|
|
engine: &mut Engine,
|
|
/// A [path]($syntax/#paths) to an XML file or raw XML bytes.
|
|
source: Spanned<DataSource>,
|
|
) -> SourceResult<Value> {
|
|
let data = source.load(engine.world)?;
|
|
let text = data.as_str().map_err(FileError::from).at(source.span)?;
|
|
let document = roxmltree::Document::parse_with_options(
|
|
text,
|
|
ParsingOptions { allow_dtd: true, ..Default::default() },
|
|
)
|
|
.map_err(format_xml_error)
|
|
.at(source.span)?;
|
|
Ok(convert_xml(document.root()))
|
|
}
|
|
|
|
#[scope]
|
|
impl xml {
|
|
/// Reads structured data from an XML string/bytes.
|
|
#[func(title = "Decode XML")]
|
|
#[deprecated = "`xml.decode` is deprecated, directly pass bytes to `xml` instead"]
|
|
pub fn decode(
|
|
engine: &mut Engine,
|
|
/// XML data.
|
|
data: Spanned<Readable>,
|
|
) -> SourceResult<Value> {
|
|
xml(engine, data.map(Readable::into_source))
|
|
}
|
|
}
|
|
|
|
/// Convert an XML node to a Typst value.
|
|
fn convert_xml(node: roxmltree::Node) -> Value {
|
|
if node.is_text() {
|
|
return node.text().unwrap_or_default().into_value();
|
|
}
|
|
|
|
let children: Array = node.children().map(convert_xml).collect();
|
|
if node.is_root() {
|
|
return Value::Array(children);
|
|
}
|
|
|
|
let tag: Str = node.tag_name().name().into();
|
|
let attrs: Dict = node
|
|
.attributes()
|
|
.map(|attr| (attr.name().into(), attr.value().into_value()))
|
|
.collect();
|
|
|
|
Value::Dict(dict! {
|
|
"tag" => tag,
|
|
"attrs" => attrs,
|
|
"children" => children,
|
|
})
|
|
}
|
|
|
|
/// Format the user-facing XML error message.
|
|
fn format_xml_error(error: roxmltree::Error) -> EcoString {
|
|
format_xml_like_error("XML", error)
|
|
}
|