diff --git a/crates/typst-library/src/foundations/str.rs b/crates/typst-library/src/foundations/str.rs index 72fdcc53a..5a939fdba 100644 --- a/crates/typst-library/src/foundations/str.rs +++ b/crates/typst-library/src/foundations/str.rs @@ -646,7 +646,7 @@ impl Repr for str { '\0' => r.push_str(r"\u{0}"), '\'' => r.push('\''), '"' => r.push_str(r#"\""#), - _ => c.escape_debug().for_each(|c| r.push(c)), + _ => r.extend(c.escape_debug()), } } r.push('"'); @@ -654,6 +654,12 @@ impl Repr for str { } } +impl Repr for char { + fn repr(&self) -> EcoString { + EcoString::from(*self).repr() + } +} + impl Add for Str { type Output = Self; diff --git a/crates/typst-library/src/html/dom.rs b/crates/typst-library/src/html/dom.rs new file mode 100644 index 000000000..ee94279f2 --- /dev/null +++ b/crates/typst-library/src/html/dom.rs @@ -0,0 +1,572 @@ +use std::fmt::{self, Debug, Display, Formatter}; + +use ecow::{EcoString, EcoVec}; +use typst_syntax::Span; +use typst_utils::{PicoStr, ResolvedPicoStr}; + +use crate::diag::{bail, HintedStrResult, StrResult}; +use crate::foundations::{cast, Dict, Repr, Str}; +use crate::introspection::{Introspector, Tag}; +use crate::layout::Frame; +use crate::model::DocumentInfo; + +/// An HTML document. +#[derive(Debug, Clone)] +pub struct HtmlDocument { + /// The document's root HTML element. + pub root: HtmlElement, + /// Details about the document. + pub info: DocumentInfo, + /// Provides the ability to execute queries on the document. + pub introspector: Introspector, +} + +/// A child of an HTML element. +#[derive(Debug, Clone, Hash)] +pub enum HtmlNode { + /// An introspectable element that produced something within this node. + Tag(Tag), + /// Plain text. + Text(EcoString, Span), + /// Another element. + Element(HtmlElement), + /// A frame that will be displayed as an embedded SVG. + Frame(Frame), +} + +impl HtmlNode { + /// Create a plain text node. + pub fn text(text: impl Into, span: Span) -> Self { + Self::Text(text.into(), span) + } +} + +impl From for HtmlNode { + fn from(element: HtmlElement) -> Self { + Self::Element(element) + } +} + +/// An HTML element. +#[derive(Debug, Clone, Hash)] +pub struct HtmlElement { + /// The HTML tag. + pub tag: HtmlTag, + /// The element's attributes. + pub attrs: HtmlAttrs, + /// The element's children. + pub children: Vec, + /// The span from which the element originated, if any. + pub span: Span, +} + +impl HtmlElement { + /// Create a new, blank element without attributes or children. + pub fn new(tag: HtmlTag) -> Self { + Self { + tag, + attrs: HtmlAttrs::default(), + children: vec![], + span: Span::detached(), + } + } + + /// Attach children to the element. + /// + /// Note: This overwrites potential previous children. + pub fn with_children(mut self, children: Vec) -> Self { + self.children = children; + self + } + + /// Add an atribute to the element. + pub fn with_attr(mut self, key: HtmlAttr, value: impl Into) -> Self { + self.attrs.push(key, value); + self + } + + /// Attach a span to the element. + pub fn spanned(mut self, span: Span) -> Self { + self.span = span; + self + } +} + +/// The tag of an HTML element. +#[derive(Copy, Clone, Eq, PartialEq, Hash)] +pub struct HtmlTag(PicoStr); + +impl HtmlTag { + /// Intern an HTML tag string at runtime. + pub fn intern(string: &str) -> StrResult { + if string.is_empty() { + bail!("tag name must not be empty"); + } + + if let Some(c) = string.chars().find(|&c| !charsets::is_valid_in_tag_name(c)) { + bail!("the character {} is not valid in a tag name", c.repr()); + } + + Ok(Self(PicoStr::intern(string))) + } + + /// Creates a compile-time constant `HtmlTag`. + /// + /// Should only be used in const contexts because it can panic. + #[track_caller] + pub const fn constant(string: &'static str) -> Self { + if string.is_empty() { + panic!("tag name must not be empty"); + } + + let bytes = string.as_bytes(); + let mut i = 0; + while i < bytes.len() { + if !bytes[i].is_ascii_alphanumeric() { + panic!("constant tag name must be ASCII alphanumeric"); + } + i += 1; + } + + Self(PicoStr::constant(string)) + } + + /// Resolves the tag to a string. + pub fn resolve(self) -> ResolvedPicoStr { + self.0.resolve() + } + + /// Turns the tag into its inner interned string. + pub const fn into_inner(self) -> PicoStr { + self.0 + } +} + +impl Debug for HtmlTag { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + Display::fmt(self, f) + } +} + +impl Display for HtmlTag { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + write!(f, "<{}>", self.resolve()) + } +} + +cast! { + HtmlTag, + self => self.0.resolve().as_str().into_value(), + v: Str => Self::intern(&v)?, +} + +/// Attributes of an HTML element. +#[derive(Debug, Default, Clone, Eq, PartialEq, Hash)] +pub struct HtmlAttrs(pub EcoVec<(HtmlAttr, EcoString)>); + +impl HtmlAttrs { + /// Add an attribute. + pub fn push(&mut self, attr: HtmlAttr, value: impl Into) { + self.0.push((attr, value.into())); + } +} + +cast! { + HtmlAttrs, + self => self.0 + .into_iter() + .map(|(key, value)| (key.resolve().as_str().into(), value.into_value())) + .collect::() + .into_value(), + values: Dict => Self(values + .into_iter() + .map(|(k, v)| { + let attr = HtmlAttr::intern(&k)?; + let value = v.cast::()?; + Ok((attr, value)) + }) + .collect::>()?), +} + +/// An attribute of an HTML. +#[derive(Copy, Clone, Eq, PartialEq, Hash)] +pub struct HtmlAttr(PicoStr); + +impl HtmlAttr { + /// Intern an HTML attribute string at runtime. + pub fn intern(string: &str) -> StrResult { + if string.is_empty() { + bail!("attribute name must not be empty"); + } + + if let Some(c) = + string.chars().find(|&c| !charsets::is_valid_in_attribute_name(c)) + { + bail!("the character {} is not valid in an attribute name", c.repr()); + } + + Ok(Self(PicoStr::intern(string))) + } + + /// Creates a compile-time constant `HtmlAttr`. + /// + /// Should only be used in const contexts because it can panic. + #[track_caller] + pub const fn constant(string: &'static str) -> Self { + if string.is_empty() { + panic!("attribute name must not be empty"); + } + + let bytes = string.as_bytes(); + let mut i = 0; + while i < bytes.len() { + if !bytes[i].is_ascii_alphanumeric() { + panic!("constant attribute name must be ASCII alphanumeric"); + } + i += 1; + } + + Self(PicoStr::constant(string)) + } + + /// Resolves the attribute to a string. + pub fn resolve(self) -> ResolvedPicoStr { + self.0.resolve() + } + + /// Turns the attribute into its inner interned string. + pub const fn into_inner(self) -> PicoStr { + self.0 + } +} + +impl Debug for HtmlAttr { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + Display::fmt(self, f) + } +} + +impl Display for HtmlAttr { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.resolve()) + } +} + +cast! { + HtmlAttr, + self => self.0.resolve().as_str().into_value(), + v: Str => Self::intern(&v)?, +} + +/// Defines syntactical properties of HTML tags, attributes, and text. +pub mod charsets { + /// Check whether a character is in a tag name. + pub const fn is_valid_in_tag_name(c: char) -> bool { + c.is_ascii_alphanumeric() + } + + /// Check whether a character is valid in an attribute name. + pub const fn is_valid_in_attribute_name(c: char) -> bool { + match c { + // These are forbidden. + '\0' | ' ' | '"' | '\'' | '>' | '/' | '=' => false, + c if is_whatwg_control_char(c) => false, + c if is_whatwg_non_char(c) => false, + // _Everything_ else is allowed, including U+2029 paragraph + // separator. Go wild. + _ => true, + } + } + + /// Check whether a character can be an used in an attribute value without + /// escaping. + /// + /// See + pub const fn is_valid_in_attribute_value(c: char) -> bool { + match c { + // Ampersands are sometimes legal (i.e. when they are not _ambiguous + // ampersands_) but it is not worth the trouble to check for that. + '&' => false, + // Quotation marks are not allowed in double-quote-delimited attribute + // values. + '"' => false, + // All other text characters are allowed. + c => is_w3c_text_char(c), + } + } + + /// Check whether a character can be an used in normal text without + /// escaping. + pub const fn is_valid_in_normal_element_text(c: char) -> bool { + match c { + // Ampersands are sometimes legal (i.e. when they are not _ambiguous + // ampersands_) but it is not worth the trouble to check for that. + '&' => false, + // Less-than signs are not allowed in text. + '<' => false, + // All other text characters are allowed. + c => is_w3c_text_char(c), + } + } + + /// Check if something is valid text in HTML. + pub const fn is_w3c_text_char(c: char) -> bool { + match c { + // Non-characters are obviously not text characters. + c if is_whatwg_non_char(c) => false, + // Control characters are disallowed, except for whitespace. + c if is_whatwg_control_char(c) => c.is_ascii_whitespace(), + // Everything else is allowed. + _ => true, + } + } + + const fn is_whatwg_non_char(c: char) -> bool { + match c { + '\u{fdd0}'..='\u{fdef}' => true, + // Non-characters matching xxFFFE or xxFFFF up to x10FFFF (inclusive). + c if c as u32 & 0xfffe == 0xfffe && c as u32 <= 0x10ffff => true, + _ => false, + } + } + + const fn is_whatwg_control_char(c: char) -> bool { + match c { + // C0 control characters. + '\u{00}'..='\u{1f}' => true, + // Other control characters. + '\u{7f}'..='\u{9f}' => true, + _ => false, + } + } +} + +/// Predefined constants for HTML tags. +pub mod tag { + use super::HtmlTag; + + macro_rules! tags { + ($($tag:ident)*) => { + $(#[allow(non_upper_case_globals)] + pub const $tag: HtmlTag = HtmlTag::constant( + stringify!($tag) + );)* + } + } + + tags! { + a + abbr + address + area + article + aside + audio + b + base + bdi + bdo + blockquote + body + br + button + canvas + caption + cite + code + col + colgroup + data + datalist + dd + del + details + dfn + dialog + div + dl + dt + em + embed + fieldset + figcaption + figure + footer + form + h1 + h2 + h3 + h4 + h5 + h6 + head + header + hgroup + hr + html + i + iframe + img + input + ins + kbd + label + legend + li + link + main + map + mark + menu + meta + meter + nav + noscript + object + ol + optgroup + option + output + p + param + picture + pre + progress + q + rp + rt + ruby + s + samp + script + search + section + select + slot + small + source + span + strong + style + sub + summary + sup + table + tbody + td + template + textarea + tfoot + th + thead + time + title + tr + track + u + ul + var + video + wbr + } + + /// Whether the element is inline-level as opposed to being block-level. + /// + /// Not sure whether this distinction really makes sense. But we somehow + /// need to decide what to put into automatic paragraphs. A `` + /// should merged into a paragraph created by realization, but a `
` + /// shouldn't. + /// + /// + /// + /// + pub fn is_inline(tag: HtmlTag) -> bool { + matches!( + tag, + self::abbr + | self::a + | self::bdi + | self::b + | self::br + | self::bdo + | self::code + | self::cite + | self::dfn + | self::data + | self::i + | self::em + | self::mark + | self::kbd + | self::rp + | self::q + | self::ruby + | self::rt + | self::samp + | self::s + | self::span + | self::small + | self::sub + | self::strong + | self::time + | self::sup + | self::var + | self::u + ) + } + + /// Whether this is a void tag whose associated element may not have a + /// children. + pub fn is_void(tag: HtmlTag) -> bool { + matches!( + tag, + self::area + | self::base + | self::br + | self::col + | self::embed + | self::hr + | self::img + | self::input + | self::link + | self::meta + | self::param + | self::source + | self::track + | self::wbr + ) + } + + /// Whether this is a tag containing raw text. + pub fn is_raw(tag: HtmlTag) -> bool { + matches!(tag, self::script | self::style) + } + + /// Whether this is a tag containing escapable raw text. + pub fn is_escapable_raw(tag: HtmlTag) -> bool { + matches!(tag, self::textarea | self::title) + } +} + +/// Predefined constants for HTML attributes. +/// +/// Note: These are very incomplete. +pub mod attr { + use super::HtmlAttr; + + macro_rules! attrs { + ($($attr:ident)*) => { + $(#[allow(non_upper_case_globals)] + pub const $attr: HtmlAttr = HtmlAttr::constant( + stringify!($attr) + );)* + } + } + + attrs! { + charset + content + href + name + value + } +} diff --git a/crates/typst-library/src/html/mod.rs b/crates/typst-library/src/html/mod.rs new file mode 100644 index 000000000..ea248172a --- /dev/null +++ b/crates/typst-library/src/html/mod.rs @@ -0,0 +1,59 @@ +//! HTML output. + +mod dom; + +pub use self::dom::*; + +use ecow::EcoString; + +use crate::foundations::{category, elem, Category, Content, Module, Scope}; + +/// HTML output. +#[category] +pub static HTML: Category; + +/// Create a module with all HTML definitions. +pub fn module() -> Module { + let mut html = Scope::deduplicating(); + html.category(HTML); + html.define_elem::(); + html.define_elem::(); + Module::new("html", html) +} + +/// A HTML element that can contain Typst content. +#[elem(name = "elem")] +pub struct HtmlElem { + /// The element's tag. + #[required] + pub tag: HtmlTag, + + /// The element's attributes. + #[borrowed] + pub attrs: HtmlAttrs, + + /// The contents of the HTML element. + #[positional] + #[borrowed] + pub body: Option, +} + +impl HtmlElem { + /// Add an atribute to the element. + pub fn with_attr(mut self, attr: HtmlAttr, value: impl Into) -> Self { + self.attrs.get_or_insert_with(Default::default).push(attr, value); + self + } +} + +/// An element that forces its contents to be laid out. +/// +/// Integrates content that requires layout (e.g. a plot) into HTML output +/// by turning it into an inline SVG. +#[elem] +pub struct FrameElem { + /// The contents that shall be laid out. + #[positional] + #[required] + pub body: Content, +} diff --git a/crates/typst-library/src/lib.rs b/crates/typst-library/src/lib.rs index a2bb61f38..87b2fcb44 100644 --- a/crates/typst-library/src/lib.rs +++ b/crates/typst-library/src/lib.rs @@ -15,6 +15,7 @@ extern crate self as typst_library; pub mod diag; pub mod engine; pub mod foundations; +pub mod html; pub mod introspection; pub mod layout; pub mod loading; @@ -248,6 +249,10 @@ fn global(math: Module, inputs: Dict, features: &Features) -> Module { self::introspection::define(&mut global); self::loading::define(&mut global); self::symbols::define(&mut global); + global.reset_category(); + if features.is_enabled(Feature::Html) { + global.define_module(self::html::module()); + } prelude(&mut global); Module::new("global", global) } diff --git a/crates/typst-utils/src/pico.rs b/crates/typst-utils/src/pico.rs index dbab14a1c..2c80d37de 100644 --- a/crates/typst-utils/src/pico.rs +++ b/crates/typst-utils/src/pico.rs @@ -216,6 +216,8 @@ mod exceptions { pub const LIST: &[&str] = &[ "cjk-latin-spacing", "discretionary-ligatures", + "h5", + "h6", "historical-ligatures", "number-clearance", "number-margin",