2024-10-27 18:04:55 +00:00

124 lines
3.5 KiB
Rust

use ecow::EcoString;
use roxmltree::ParsingOptions;
use typst_syntax::Spanned;
use crate::diag::{format_xml_like_error, At, FileError, SourceResult};
use crate::engine::Engine;
use crate::foundations::{dict, func, scope, Array, Dict, IntoValue, Str, Value};
use crate::loading::Readable;
use crate::World;
/// Reads structured data from an XML file.
///
/// The XML file is parsed into an array of dictionaries and strings. XML nodes
/// can be elements or strings. Elements are represented as dictionaries with
/// the following keys:
///
/// - `tag`: The name of the element as a string.
/// - `attrs`: A dictionary of the element's attributes as strings.
/// - `children`: An array of the element's child nodes.
///
/// The XML file in the example contains a root `news` tag with multiple
/// `article` tags. Each article has a `title`, `author`, and `content` tag. The
/// `content` tag contains one or more paragraphs, which are represented as `p`
/// tags.
///
/// # Example
/// ```example
/// #let find-child(elem, tag) = {
/// elem.children
/// .find(e => "tag" in e and e.tag == tag)
/// }
///
/// #let article(elem) = {
/// let title = find-child(elem, "title")
/// let author = find-child(elem, "author")
/// let pars = find-child(elem, "content")
///
/// heading(title.children.first())
/// text(10pt, weight: "medium")[
/// Published by
/// #author.children.first()
/// ]
///
/// for p in pars.children {
/// if (type(p) == "dictionary") {
/// parbreak()
/// p.children.first()
/// }
/// }
/// }
///
/// #let data = xml("example.xml")
/// #for elem in data.first().children {
/// if (type(elem) == "dictionary") {
/// article(elem)
/// }
/// }
/// ```
#[func(scope, title = "XML")]
pub fn xml(
/// The engine.
engine: &mut Engine,
/// Path to an XML file.
///
/// For more details, see the [Paths section]($syntax/#paths).
path: Spanned<EcoString>,
) -> SourceResult<Value> {
let Spanned { v: path, span } = path;
let id = span.resolve_path(&path).at(span)?;
let data = engine.world.file(id).at(span)?;
xml::decode(Spanned::new(Readable::Bytes(data), span))
}
#[scope]
impl xml {
/// Reads structured data from an XML string/bytes.
#[func(title = "Decode XML")]
pub fn decode(
/// XML data.
data: Spanned<Readable>,
) -> SourceResult<Value> {
let Spanned { v: data, span } = data;
let text = std::str::from_utf8(data.as_slice())
.map_err(FileError::from)
.at(span)?;
let document = roxmltree::Document::parse_with_options(
text,
ParsingOptions { allow_dtd: true, ..Default::default() },
)
.map_err(format_xml_error)
.at(span)?;
Ok(convert_xml(document.root()))
}
}
/// Convert an XML node to a Typst value.
fn convert_xml(node: roxmltree::Node) -> Value {
if node.is_text() {
return node.text().unwrap_or_default().into_value();
}
let children: Array = node.children().map(convert_xml).collect();
if node.is_root() {
return Value::Array(children);
}
let tag: Str = node.tag_name().name().into();
let attrs: Dict = node
.attributes()
.map(|attr| (attr.name().into(), attr.value().into_value()))
.collect();
Value::Dict(dict! {
"tag" => tag,
"attrs" => attrs,
"children" => children,
})
}
/// Format the user-facing XML error message.
fn format_xml_error(error: roxmltree::Error) -> EcoString {
format_xml_like_error("XML", error)
}