//! Typst's realization subsystem. //! //! *Realization* is the process of recursively applying styling and, in //! particular, show rules to produce well-known elements that can be processed //! further. use std::borrow::Cow; use std::cell::LazyCell; use arrayvec::ArrayVec; use bumpalo::collections::{String as BumpString, Vec as BumpVec}; use comemo::Track; use ecow::EcoString; use typst_library::diag::{bail, At, SourceResult}; use typst_library::engine::Engine; use typst_library::foundations::{ Content, Context, ContextElem, Element, NativeElement, Recipe, RecipeIndex, Selector, SequenceElem, Show, ShowSet, Style, StyleChain, StyleVec, StyledElem, Styles, Synthesize, Transformation, }; use typst_library::html::{tag, HtmlElem}; use typst_library::introspection::{Locatable, SplitLocator, Tag, TagElem}; use typst_library::layout::{ AlignElem, BoxElem, HElem, InlineElem, PageElem, PagebreakElem, VElem, }; use typst_library::math::{EquationElem, Mathy}; use typst_library::model::{ CiteElem, CiteGroup, DocumentElem, EnumElem, ListElem, ListItemLike, ListLike, ParElem, ParbreakElem, TermsElem, }; use typst_library::routines::{Arenas, Pair, RealizationKind}; use typst_library::text::{LinebreakElem, SmartQuoteElem, SpaceElem, TextElem}; use typst_syntax::Span; use typst_utils::{SliceExt, SmallBitSet}; /// Realize content into a flat list of well-known, styled items. #[typst_macros::time(name = "realize")] pub fn realize<'a>( kind: RealizationKind, engine: &mut Engine, locator: &mut SplitLocator, arenas: &'a Arenas, content: &'a Content, styles: StyleChain<'a>, ) -> SourceResult>> { let mut s = State { engine, locator, arenas, rules: match kind { RealizationKind::LayoutDocument(_) | RealizationKind::LayoutFragment => { LAYOUT_RULES } RealizationKind::HtmlDocument(_) => HTML_DOCUMENT_RULES, RealizationKind::HtmlFragment => HTML_FRAGMENT_RULES, RealizationKind::Math => MATH_RULES, }, sink: vec![], groupings: ArrayVec::new(), outside: matches!(kind, RealizationKind::LayoutDocument(_)), may_attach: false, kind, }; visit(&mut s, content, styles)?; finish(&mut s)?; Ok(s.sink) } /// Mutable state for realization. /// /// Sadly, we need that many lifetimes because &mut references are invariant and /// it would force the lifetimes of e.g. engine and locator to be equal if they /// shared a lifetime. We can get around it by enforcing the lifetimes on /// `fn realize`, but that makes it less flexible on the call site, which isn't /// worth it. /// /// The only interesting lifetime is 'a, which is that of the content that comes /// in and goes out. It's the same 'a as on `fn realize`. struct State<'a, 'x, 'y, 'z> { /// Defines what kind of realization we are performing. kind: RealizationKind<'x>, /// The engine. engine: &'x mut Engine<'y>, /// Assigns unique locations to elements. locator: &'x mut SplitLocator<'z>, /// Temporary storage arenas for lifetime extension during realization. arenas: &'a Arenas, /// The output elements of well-known types. sink: Vec>, /// Grouping rules used for realization. rules: &'x [&'x GroupingRule], /// Currently active groupings. groupings: ArrayVec, MAX_GROUP_NESTING>, /// Whether we are currently not within any container or show rule output. /// This is used to determine page styles during layout. outside: bool, /// Whether now following attach spacing can survive. may_attach: bool, } /// Defines a rule for how certain elements shall be grouped during realization. struct GroupingRule { /// When an element is visited that matches a rule with higher priority /// than one that is currently grouped, we start a nested group. priority: u8, /// Whether the grouping handles tags itself. If this is set to `false`, /// realization will transparently take care of tags and they will not /// be visible to `finish`. tags: bool, /// Defines which kinds of elements start and make up this kind of grouping. trigger: fn(&Content, &RealizationKind) -> bool, /// Defines elements that may appear in the interior of the grouping, but /// not at the edges. inner: fn(&Content) -> bool, /// Defines whether styles for this kind of element interrupt the grouping. interrupt: fn(Element) -> bool, /// Should convert the accumulated elements in `s.sink[start..]` into /// the grouped element. finish: fn(Grouped) -> SourceResult<()>, } /// A started grouping of some elements. struct Grouping<'a> { /// The position in `s.sink` where the group starts. start: usize, /// The rule used for this grouping. rule: &'a GroupingRule, } /// The result of grouping. struct Grouped<'a, 'x, 'y, 'z, 's> { /// The realization state. s: &'s mut State<'a, 'x, 'y, 'z>, /// The position in `s.sink` where the group starts. start: usize, } /// What to do with an element when encountering it during realization. struct Verdict<'a> { /// Whether the element is already prepared (i.e. things that should only /// happen once have happened). prepared: bool, /// A map of styles to apply to the element. map: Styles, /// An optional show rule transformation to apply to the element. step: Option>, } /// A show rule transformation to apply to the element. enum ShowStep<'a> { /// A user-defined transformational show rule. Recipe(&'a Recipe, RecipeIndex), /// The built-in show rule. Builtin, } /// A match of a regex show rule. struct RegexMatch<'a> { /// The offset in the string that matched. offset: usize, /// The text that matched. text: EcoString, /// The style chain of the matching grouping. styles: StyleChain<'a>, /// The index of the recipe that matched. id: RecipeIndex, /// The recipe that matched. recipe: &'a Recipe, } /// State kept for space collapsing. #[derive(Debug, Copy, Clone, Eq, PartialEq)] enum SpaceState { /// A following space will be collapsed. Destructive, /// A following space will be kept unless a destructive element follows. Supportive, /// A space exists at this index. Space(usize), } impl<'a> State<'a, '_, '_, '_> { /// Lifetime-extends some content. fn store(&self, content: Content) -> &'a Content { self.arenas.content.alloc(content) } /// Lifetime-extends some pairs. /// /// By using a `BumpVec` instead of a `alloc_slice_copy` we can reuse /// the space if no other bump allocations have been made by the time /// the `BumpVec` is dropped. fn store_slice(&self, pairs: &[Pair<'a>]) -> BumpVec<'a, Pair<'a>> { let mut vec = BumpVec::new_in(&self.arenas.bump); vec.extend_from_slice_copy(pairs); vec } } impl<'a, 'x, 'y, 'z, 's> Grouped<'a, 'x, 'y, 'z, 's> { /// Accesses the grouped elements. fn get(&self) -> &[Pair<'a>] { &self.s.sink[self.start..] } /// Accesses the grouped elements mutably. fn get_mut(&mut self) -> (&mut Vec>, usize) { (&mut self.s.sink, self.start) } /// Removes the grouped elements from the sink and retrieves back the state /// with which resulting elements can be visited. fn end(self) -> &'s mut State<'a, 'x, 'y, 'z> { self.s.sink.truncate(self.start); self.s } } /// Handles an arbitrary piece of content during realization. fn visit<'a>( s: &mut State<'a, '_, '_, '_>, content: &'a Content, styles: StyleChain<'a>, ) -> SourceResult<()> { // Tags can always simply be pushed. if content.is::() { s.sink.push((content, styles)); return Ok(()); } // Transformations for math content based on the realization kind. Needs // to happen before show rules. if visit_math_rules(s, content, styles)? { return Ok(()); } // Apply show rules and preparation. if visit_show_rules(s, content, styles)? { return Ok(()); } // Recurse into sequences. Styled elements and sequences can currently also // have labels, so this needs to happen before they are handled. if let Some(sequence) = content.to_packed::() { for elem in &sequence.children { visit(s, elem, styles)?; } return Ok(()); } // Recurse into styled elements. if let Some(styled) = content.to_packed::() { return visit_styled(s, &styled.child, Cow::Borrowed(&styled.styles), styles); } // Apply grouping --- where multiple elements are collected and then // processed together (typically being transformed into one). if visit_grouping_rules(s, content, styles)? { return Ok(()); } // Some elements are skipped based on specific circumstances. if visit_filter_rules(s, content, styles)? { return Ok(()); } // No further transformations to apply, so we can finally just push it to // the output! s.sink.push((content, styles)); Ok(()) } // Handles special cases for math in normal content and nested equations in // math. fn visit_math_rules<'a>( s: &mut State<'a, '_, '_, '_>, content: &'a Content, styles: StyleChain<'a>, ) -> SourceResult { if let RealizationKind::Math = s.kind { // Transparently recurse into equations nested in math, so that things // like this work: // ``` // #let my = $pi$ // $ my r^2 $ // ``` if let Some(elem) = content.to_packed::() { visit(s, &elem.body, styles)?; return Ok(true); } // In normal realization, we apply regex show rules to consecutive // textual elements via `TEXTUAL` grouping. However, in math, this is // not desirable, so we just do it on a per-element basis. if let Some(elem) = content.to_packed::() { if let Some(m) = find_regex_match_in_str(&elem.text, styles) { visit_regex_match(s, &[(content, styles)], m)?; return Ok(true); } } } else { // Transparently wrap mathy content into equations. if content.can::() && !content.is::() { let eq = EquationElem::new(content.clone()).pack().spanned(content.span()); visit(s, s.store(eq), styles)?; return Ok(true); } } Ok(false) } /// Tries to apply show rules to or prepare content. Returns `true` if the /// element was handled. fn visit_show_rules<'a>( s: &mut State<'a, '_, '_, '_>, content: &'a Content, styles: StyleChain<'a>, ) -> SourceResult { // Determines whether and how to proceed with show rule application. let Some(Verdict { prepared, mut map, step }) = verdict(s.engine, content, styles) else { return Ok(false); }; // Create a fresh copy that we can mutate. let mut output = Cow::Borrowed(content); // If the element isn't yet prepared (we're seeing it for the first time), // prepare it. let mut tags = None; if !prepared { tags = prepare(s.engine, s.locator, output.to_mut(), &mut map, styles)?; } // Apply a show rule step, if there is one. if let Some(step) = step { let chained = styles.chain(&map); let result = match step { // Apply a user-defined show rule. ShowStep::Recipe(recipe, guard) => { let context = Context::new(output.location(), Some(chained)); recipe.apply( s.engine, context.track(), output.into_owned().guarded(guard), ) } // Apply a built-in show rule. ShowStep::Builtin => { output.with::().unwrap().show(s.engine, chained) } }; // Errors in show rules don't terminate compilation immediately. We just // continue with empty content for them and show all errors together, if // they remain by the end of the introspection loop. // // This way, we can ignore errors that only occur in earlier iterations // and also show more useful errors at once. output = Cow::Owned(s.engine.delay(result)); } // Lifetime-extend the realized content if necessary. let realized = match output { Cow::Borrowed(realized) => realized, Cow::Owned(realized) => s.store(realized), }; // Push start tag. let (start, end) = tags.unzip(); if let Some(tag) = start { visit(s, s.store(TagElem::packed(tag)), styles)?; } let prev_outside = s.outside; s.outside &= content.is::(); s.engine.route.increase(); s.engine.route.check_show_depth().at(content.span())?; visit_styled(s, realized, Cow::Owned(map), styles)?; s.outside = prev_outside; s.engine.route.decrease(); // Push end tag. if let Some(tag) = end { visit(s, s.store(TagElem::packed(tag)), styles)?; } Ok(true) } /// Inspects a target element and the current styles and determines how to /// proceed with the styling. fn verdict<'a>( engine: &mut Engine, target: &'a Content, styles: StyleChain<'a>, ) -> Option> { let prepared = target.is_prepared(); let mut map = Styles::new(); let mut step = None; // Do pre-synthesis on a cloned element to be able to match on synthesized // fields before real synthesis runs (during preparation). It's really // unfortunate that we have to do this, but otherwise // `show figure.where(kind: table)` won't work :( let mut target = target; let mut slot; if !prepared && target.can::() { slot = target.clone(); slot.with_mut::() .unwrap() .synthesize(engine, styles) .ok(); target = &slot; } // Lazily computes the total number of recipes in the style chain. We need // it to determine whether a particular show rule was already applied to the // `target` previously. For this purpose, show rules are indexed from the // top of the chain as the chain might grow to the bottom. let depth = LazyCell::new(|| styles.recipes().count()); for (r, recipe) in styles.recipes().enumerate() { // We're not interested in recipes that don't match. if !recipe .selector() .is_some_and(|selector| selector.matches(target, Some(styles))) { continue; } // Special handling for show-set rules. if let Transformation::Style(transform) = recipe.transform() { if !prepared { map.apply(transform.clone()); } continue; } // If we already have a show step, don't look for one. if step.is_some() { continue; } // Check whether this show rule was already applied to the target. let index = RecipeIndex(*depth - r); if target.is_guarded(index) { continue; } // We'll apply this recipe. step = Some(ShowStep::Recipe(recipe, index)); // If we found a show rule and are already prepared, there is nothing // else to do, so we can just break. If we are not yet prepared, // continue searching for potential show-set styles. if prepared { break; } } // If we found no user-defined rule, also consider the built-in show rule. if step.is_none() && target.can::() { step = Some(ShowStep::Builtin); } // If there's no nothing to do, there is also no verdict. if step.is_none() && map.is_empty() && (prepared || { target.label().is_none() && target.location().is_none() && !target.can::() && !target.can::() && !target.can::() }) { return None; } Some(Verdict { prepared, map, step }) } /// This is only executed the first time an element is visited. fn prepare( engine: &mut Engine, locator: &mut SplitLocator, target: &mut Content, map: &mut Styles, styles: StyleChain, ) -> SourceResult> { // Generate a location for the element, which uniquely identifies it in // the document. This has some overhead, so we only do it for elements // that are explicitly marked as locatable and labelled elements. // // The element could already have a location even if it is not prepared // when it stems from a query. let key = typst_utils::hash128(&target); if target.location().is_none() && (target.can::() || target.label().is_some()) { let loc = locator.next_location(engine.introspector, key); target.set_location(loc); } // Apply built-in show-set rules. User-defined show-set rules are already // considered in the map built while determining the verdict. if let Some(show_settable) = target.with::() { map.apply(show_settable.show_set(styles)); } // If necessary, generated "synthesized" fields (which are derived from // other fields or queries). Do this after show-set so that show-set styles // are respected. if let Some(synthesizable) = target.with_mut::() { synthesizable.synthesize(engine, styles.chain(map))?; } // Copy style chain fields into the element itself, so that they are // available in rules. target.materialize(styles.chain(map)); // If the element is locatable, create start and end tags to be able to find // the element in the frames after layout. Do this after synthesis and // materialization, so that it includes the synthesized fields. Do it before // marking as prepared so that show-set rules will apply to this element // when queried. let tags = target .location() .map(|loc| (Tag::Start(target.clone()), Tag::End(loc, key))); // Ensure that this preparation only runs once by marking the element as // prepared. target.mark_prepared(); Ok(tags) } /// Handles a styled element. fn visit_styled<'a>( s: &mut State<'a, '_, '_, '_>, content: &'a Content, mut local: Cow<'a, Styles>, outer: StyleChain<'a>, ) -> SourceResult<()> { // Nothing to do if the styles are actually empty. if local.is_empty() { return visit(s, content, outer); } // Check for document and page styles. let mut pagebreak = false; for style in local.iter() { let Some(elem) = style.element() else { continue }; if elem == DocumentElem::elem() { match &mut s.kind { RealizationKind::LayoutDocument(info) | RealizationKind::HtmlDocument(info) => info.populate(&local), _ => bail!( style.span(), "document set rules are not allowed inside of containers" ), } } else if elem == PageElem::elem() { let RealizationKind::LayoutDocument(_) = s.kind else { let span = style.span(); bail!(span, "page configuration is not allowed inside of containers"); }; // When there are page styles, we "break free" from our show rule cage. pagebreak = true; s.outside = true; } } // If we are not within a container or show rule, mark the styles as // "outside". This will allow them to be lifted to the page level. if s.outside { local = Cow::Owned(local.into_owned().outside()); } // Lifetime-extend the styles if necessary. let outer = s.arenas.bump.alloc(outer); let local = match local { Cow::Borrowed(map) => map, Cow::Owned(owned) => &*s.arenas.styles.alloc(owned), }; // Generate a weak pagebreak if there is a page interruption. For the // starting pagebreak we only want the styles before and including the // interruptions, not trailing styles that happen to be in the same `Styles` // list, so we trim the local styles. if pagebreak { let relevant = local .as_slice() .trim_end_matches(|style| style.element() != Some(PageElem::elem())); visit(s, PagebreakElem::shared_weak(), outer.chain(relevant))?; } finish_interrupted(s, local)?; visit(s, content, outer.chain(local))?; finish_interrupted(s, local)?; // Generate a weak "boundary" pagebreak at the end. In comparison to a // normal weak pagebreak, the styles of this are ignored during layout, so // it doesn't really matter what we use here. if pagebreak { visit(s, PagebreakElem::shared_boundary(), *outer)?; } Ok(()) } /// Tries to group the content in an active group or start a new one if any /// grouping rule matches. Returns `true` if the element was grouped. fn visit_grouping_rules<'a>( s: &mut State<'a, '_, '_, '_>, content: &'a Content, styles: StyleChain<'a>, ) -> SourceResult { let matching = s.rules.iter().find(|&rule| (rule.trigger)(content, &s.kind)); // Try to continue or finish an existing grouping. while let Some(active) = s.groupings.last() { // Start a nested group if a rule with higher priority matches. if matching.is_some_and(|rule| rule.priority > active.rule.priority) { break; } // If the element can be added to the active grouping, do it. if (active.rule.trigger)(content, &s.kind) || (active.rule.inner)(content) { s.sink.push((content, styles)); return Ok(true); } finish_innermost_grouping(s)?; } // Start a new grouping. if let Some(rule) = matching { let start = s.sink.len(); s.groupings.push(Grouping { start, rule }); s.sink.push((content, styles)); return Ok(true); } Ok(false) } /// Some elements don't make it to the sink depending on the realization kind /// and current state. fn visit_filter_rules<'a>( s: &mut State<'a, '_, '_, '_>, content: &'a Content, styles: StyleChain<'a>, ) -> SourceResult { if content.is::() && !matches!(s.kind, RealizationKind::Math | RealizationKind::HtmlFragment) { // Outside of maths, spaces that were not collected by the paragraph // grouper don't interest us. return Ok(true); } else if content.is::() { // Paragraph breaks are only a boundary for paragraph grouping, we don't // need to store them. s.may_attach = false; return Ok(true); } else if !s.may_attach && content.to_packed::().is_some_and(|elem| elem.attach(styles)) { // Delete attach spacing collapses if not immediately following a // paragraph. return Ok(true); } // Remember whether following attach spacing can survive. s.may_attach = content.is::(); Ok(false) } /// Finishes all grouping. fn finish(s: &mut State) -> SourceResult<()> { finish_grouping_while(s, |s| !s.groupings.is_empty())?; // In math, spaces are top-level. if let RealizationKind::Math = s.kind { collapse_spaces(&mut s.sink, 0); } Ok(()) } /// Finishes groupings while any active group is interrupted by the styles. fn finish_interrupted(s: &mut State, local: &Styles) -> SourceResult<()> { let mut last = None; for elem in local.iter().filter_map(|style| style.element()) { if last == Some(elem) { continue; } finish_grouping_while(s, |s| { s.groupings.iter().any(|grouping| (grouping.rule.interrupt)(elem)) })?; last = Some(elem); } Ok(()) } /// Finishes groupings while `f` returns `true`. fn finish_grouping_while(s: &mut State, f: F) -> SourceResult<()> where F: Fn(&State) -> bool, { // Finishing of a group may result in new content and new grouping. This // can, in theory, go on for a bit. To prevent it from becoming an infinite // loop, we keep track of the iteration count. let mut i = 0; while f(s) { finish_innermost_grouping(s)?; i += 1; if i > 512 { bail!(Span::detached(), "maximum grouping depth exceeded"); } } Ok(()) } /// Finishes the currently innermost grouping. fn finish_innermost_grouping(s: &mut State) -> SourceResult<()> { // The grouping we are interrupting. let Grouping { start, rule } = s.groupings.pop().unwrap(); // Trim trailing non-trigger elements. let trimmed = s.sink[start..].trim_end_matches(|(c, _)| !(rule.trigger)(c, &s.kind)); let end = start + trimmed.len(); let tail = s.store_slice(&s.sink[end..]); s.sink.truncate(end); // If the grouping is not interested in tags, remove and collect them. let mut tags = BumpVec::::new_in(&s.arenas.bump); if !rule.tags { let mut k = start; for i in start..end { if s.sink[i].0.is::() { tags.push(s.sink[i]); continue; } if k < i { s.sink[k] = s.sink[i]; } k += 1; } s.sink.truncate(k); } // Execute the grouping's finisher rule. (rule.finish)(Grouped { s, start })?; // Visit the tags and staged elements again. for &(content, styles) in tags.iter().chain(&tail) { visit(s, content, styles)?; } Ok(()) } /// The maximum number of nested groups that are possible. Corresponds to the /// number of unique priority levels. const MAX_GROUP_NESTING: usize = 3; /// Grouping rules used in layout realization. static LAYOUT_RULES: &[&GroupingRule] = &[&TEXTUAL, &PAR, &CITES, &LIST, &ENUM, &TERMS]; /// Grouping rules used in HTML root realization. static HTML_DOCUMENT_RULES: &[&GroupingRule] = &[&TEXTUAL, &PAR, &CITES, &LIST, &ENUM, &TERMS]; /// Grouping rules used in HTML fragment realization. static HTML_FRAGMENT_RULES: &[&GroupingRule] = &[&TEXTUAL, &CITES, &LIST, &ENUM, &TERMS]; /// Grouping rules used in math realizatio. static MATH_RULES: &[&GroupingRule] = &[&CITES, &LIST, &ENUM, &TERMS]; /// Groups adjacent textual elements for text show rule application. static TEXTUAL: GroupingRule = GroupingRule { priority: 3, tags: true, trigger: |content, _| { let elem = content.elem(); elem == TextElem::elem() || elem == LinebreakElem::elem() || elem == SmartQuoteElem::elem() }, inner: |content| content.elem() == SpaceElem::elem(), // Any kind of style interrupts this kind of grouping since regex show // rules cannot match over style changes anyway. interrupt: |_| true, finish: finish_textual, }; /// Collects inline-level elements into a `ParElem`. static PAR: GroupingRule = GroupingRule { priority: 1, tags: true, trigger: |content, kind| { let elem = content.elem(); elem == TextElem::elem() || elem == HElem::elem() || elem == LinebreakElem::elem() || elem == SmartQuoteElem::elem() || elem == InlineElem::elem() || elem == BoxElem::elem() || (matches!( kind, RealizationKind::HtmlDocument(_) | RealizationKind::HtmlFragment ) && content .to_packed::() .is_some_and(|elem| tag::is_inline(elem.tag))) }, inner: |content| content.elem() == SpaceElem::elem(), interrupt: |elem| elem == ParElem::elem() || elem == AlignElem::elem(), finish: finish_par, }; /// Collects `CiteElem`s into `CiteGroup`s. static CITES: GroupingRule = GroupingRule { priority: 2, tags: false, trigger: |content, _| content.elem() == CiteElem::elem(), inner: |content| content.elem() == SpaceElem::elem(), interrupt: |elem| { elem == CiteGroup::elem() || elem == ParElem::elem() || elem == AlignElem::elem() }, finish: finish_cites, }; /// Builds a `ListElem` from grouped `ListItems`s. static LIST: GroupingRule = list_like_grouping::(); /// Builds an `EnumElem` from grouped `EnumItem`s. static ENUM: GroupingRule = list_like_grouping::(); /// Builds a `TermsElem` from grouped `TermItem`s. static TERMS: GroupingRule = list_like_grouping::(); /// Collects `ListItemLike` elements into a `ListLike` element. const fn list_like_grouping() -> GroupingRule { GroupingRule { priority: 2, tags: false, trigger: |content, _| content.elem() == T::Item::elem(), inner: |content| { let elem = content.elem(); elem == SpaceElem::elem() || elem == ParbreakElem::elem() }, interrupt: |elem| elem == T::elem() || elem == AlignElem::elem(), finish: finish_list_like::, } } /// Processes grouped textual elements. /// /// Specifically, it searches for regex matches in grouped textual elements and /// - if there was a match, visits the results recursively, /// - if there was no match, tries to simply implicitly use the grouped elements /// as part of a paragraph grouping, /// - if that's not possible because another grouping is active, temporarily /// disables textual grouping and revisits the elements. fn finish_textual(Grouped { s, mut start }: Grouped) -> SourceResult<()> { // Try to find a regex match in the grouped textual elements. Returns early // if there is one. if visit_textual(s, start)? { return Ok(()); } // There was no regex match, so we need to collect the text into a paragraph // grouping. To do that, we first terminate all non-paragraph groupings. if in_non_par_grouping(s) { let elems = s.store_slice(&s.sink[start..]); s.sink.truncate(start); finish_grouping_while(s, in_non_par_grouping)?; start = s.sink.len(); s.sink.extend(elems); } // Now, there are only two options: // 1. We are already in a paragraph group. In this case, the elements just // transparently become part of it. // 2. There is no group at all. In this case, we create one. if s.groupings.is_empty() && s.rules.iter().any(|&rule| std::ptr::eq(rule, &PAR)) { s.groupings.push(Grouping { start, rule: &PAR }); } Ok(()) } /// Whether there is an active grouping, but it is not a `PAR` grouping. fn in_non_par_grouping(s: &State) -> bool { s.groupings .last() .is_some_and(|grouping| !std::ptr::eq(grouping.rule, &PAR)) } /// Builds the `ParElem` from inline-level elements. fn finish_par(mut grouped: Grouped) -> SourceResult<()> { // Collapse unsupported spaces in-place. let (sink, start) = grouped.get_mut(); collapse_spaces(sink, start); // Collect the children. let elems = grouped.get(); let span = select_span(elems); let (children, trunk) = StyleVec::create(elems); // Create and visit the paragraph. let s = grouped.end(); let elem = ParElem::new(children).pack().spanned(span); visit(s, s.store(elem), trunk) } /// Builds the `CiteGroup` from `CiteElem`s. fn finish_cites(grouped: Grouped) -> SourceResult<()> { // Collect the children. let elems = grouped.get(); let span = select_span(elems); let trunk = elems[0].1; let children = elems .iter() .filter_map(|(c, _)| c.to_packed::()) .cloned() .collect(); // Create and visit the citation group. let s = grouped.end(); let elem = CiteGroup::new(children).pack().spanned(span); visit(s, s.store(elem), trunk) } /// Builds the `ListLike` element from `ListItemLike` elements. fn finish_list_like(grouped: Grouped) -> SourceResult<()> { // Collect the children. let elems = grouped.get(); let span = select_span(elems); let tight = !elems.iter().any(|(c, _)| c.is::()); let styles = elems.iter().filter(|(c, _)| c.is::()).map(|&(_, s)| s); let trunk = StyleChain::trunk(styles).unwrap(); let trunk_depth = trunk.links().count(); let children = elems .iter() .copied() .filter_map(|(c, s)| { let item = c.to_packed::()?.clone(); let local = s.suffix(trunk_depth); Some(T::Item::styled(item, local)) }) .collect(); // Create and visit the list. let s = grouped.end(); let elem = T::create(children, tight).pack().spanned(span); visit(s, s.store(elem), trunk) } /// Visit textual elements in `s.sink[start..]` and apply regex show rules to /// them. fn visit_textual(s: &mut State, start: usize) -> SourceResult { // Try to find a regex match in the grouped textual elements. if let Some(m) = find_regex_match_in_elems(s, &s.sink[start..]) { collapse_spaces(&mut s.sink, start); let elems = s.store_slice(&s.sink[start..]); s.sink.truncate(start); visit_regex_match(s, &elems, m)?; return Ok(true); } Ok(false) } /// Finds the leftmost regex match for this style chain in the given textual /// elements. /// /// Collects the element's merged textual representation into the bump arena. /// This merging also takes into account space collapsing so that we don't need /// to call `collapse_spaces` on every textual group, performing yet another /// linear pass. We only collapse the spaces elements themselves on the cold /// path where there is an actual match. fn find_regex_match_in_elems<'a>( s: &State, elems: &[Pair<'a>], ) -> Option> { let mut buf = BumpString::new_in(&s.arenas.bump); let mut base = 0; let mut leftmost = None; let mut current = StyleChain::default(); let mut space = SpaceState::Destructive; for &(content, styles) in elems { if content.is::() { continue; } let linebreak = content.is::(); if linebreak { if let SpaceState::Space(_) = space { buf.pop(); } } if styles != current && !buf.is_empty() { leftmost = find_regex_match_in_str(&buf, current); if leftmost.is_some() { break; } base += buf.len(); buf.clear(); } current = styles; space = if content.is::() { if space != SpaceState::Supportive { continue; } buf.push(' '); SpaceState::Space(0) } else if linebreak { buf.push('\n'); SpaceState::Destructive } else if let Some(elem) = content.to_packed::() { buf.push(if elem.double(styles) { '"' } else { '\'' }); SpaceState::Supportive } else if let Some(elem) = content.to_packed::() { buf.push_str(&elem.text); SpaceState::Supportive } else { panic!("tried to find regex match in non-textual elements"); }; } if leftmost.is_none() { leftmost = find_regex_match_in_str(&buf, current); } leftmost.map(|m| RegexMatch { offset: base + m.offset, ..m }) } /// Finds the leftmost regex match for this style chain in the given text. fn find_regex_match_in_str<'a>( text: &str, styles: StyleChain<'a>, ) -> Option> { let mut r = 0; let mut revoked = SmallBitSet::new(); let mut leftmost: Option<(regex::Match, RecipeIndex, &Recipe)> = None; let depth = LazyCell::new(|| styles.recipes().count()); for entry in styles.entries() { let recipe = match &**entry { Style::Recipe(recipe) => recipe, Style::Property(_) => continue, Style::Revocation(index) => { revoked.insert(index.0); continue; } }; r += 1; let Some(Selector::Regex(regex)) = recipe.selector() else { continue }; let Some(m) = regex.find(text) else { continue }; // Make sure we don't get any empty matches. if m.range().is_empty() { continue; } // If we already have a match that is equally or more to the left, we're // not interested in this new match. if leftmost.is_some_and(|(p, ..)| p.start() <= m.start()) { continue; } // Check whether the rule is already revoked. Do it only now to not // compute the depth unnecessarily. We subtract 1 from r because we // already incremented it. let index = RecipeIndex(*depth - (r - 1)); if revoked.contains(index.0) { continue; } leftmost = Some((m, index, recipe)); } leftmost.map(|(m, id, recipe)| RegexMatch { offset: m.start(), text: m.as_str().into(), id, recipe, styles, }) } /// Visit a match of a regular expression. /// /// This first revisits all elements before the match, potentially slicing up /// a text element, then the transformed match, and then the remaining elements /// after the match. fn visit_regex_match<'a>( s: &mut State<'a, '_, '_, '_>, elems: &[Pair<'a>], m: RegexMatch<'a>, ) -> SourceResult<()> { let match_range = m.offset..m.offset + m.text.len(); let piece = TextElem::packed(m.text); let context = Context::new(None, Some(m.styles)); let output = m.recipe.apply(s.engine, context.track(), piece)?; let mut cursor = 0; let mut output = Some(output); let mut visit_unconsumed_match = |s: &mut State<'a, '_, '_, '_>| -> SourceResult<()> { if let Some(output) = output.take() { let revocation = Style::Revocation(m.id).into(); let outer = s.arenas.bump.alloc(m.styles); let chained = outer.chain(s.arenas.styles.alloc(revocation)); visit(s, s.store(output), chained)?; } Ok(()) }; for &(content, styles) in elems { // Just forward tags. if content.is::() { visit(s, content, styles)?; continue; } // At this point, we can have a `TextElem`, `SpaceElem`, // `LinebreakElem`, or `SmartQuoteElem`. We now determine the range of // the element. let len = content.to_packed::().map_or(1, |elem| elem.text.len()); let elem_range = cursor..cursor + len; // If the element starts before the start of match, visit it fully or // sliced. if elem_range.start < match_range.start { if elem_range.end <= match_range.start { visit(s, content, styles)?; } else { let mut elem = content.to_packed::().unwrap().clone(); elem.text = elem.text[..match_range.start - elem_range.start].into(); visit(s, s.store(elem.pack()), styles)?; } } // When the match starts before this element ends, visit it. if match_range.start < elem_range.end { visit_unconsumed_match(s)?; } // If the element ends after the end of the match, visit if fully or // sliced. if elem_range.end > match_range.end { if elem_range.start >= match_range.end { visit(s, content, styles)?; } else { let mut elem = content.to_packed::().unwrap().clone(); elem.text = elem.text[match_range.end - elem_range.start..].into(); visit(s, s.store(elem.pack()), styles)?; } } cursor = elem_range.end; } // If the match wasn't consumed yet, visit it. This shouldn't really happen // in practice (we'd need to have an empty match at the end), but it's an // extra fail-safe. visit_unconsumed_match(s)?; Ok(()) } /// Collapses all spaces within `buf[start..]` that are at the edges or in the /// vicinity of destructive elements. fn collapse_spaces(buf: &mut Vec, start: usize) { let mut state = SpaceState::Destructive; let mut k = start; // We do one pass over the elements, backshifting everything as necessary // when a space collapses. The variable `i` is our cursor in the original // elements. The variable `k` is our cursor in the result. At all times, we // have `k <= i`, so we can do it in place. for i in start..buf.len() { let (content, styles) = buf[i]; // Determine the next state. if content.is::() { // Nothing to do. } else if content.is::() { if state != SpaceState::Supportive { continue; } state = SpaceState::Space(k); } else if content.is::() { destruct_space(buf, &mut k, &mut state); } else if let Some(elem) = content.to_packed::() { if elem.amount.is_fractional() || elem.weak(styles) { destruct_space(buf, &mut k, &mut state); } } else { state = SpaceState::Supportive; }; // Copy over normal elements (in place). if k < i { buf[k] = buf[i]; } k += 1; } destruct_space(buf, &mut k, &mut state); // Delete all the excess that's left due to the gaps produced by spaces. buf.truncate(k); } /// Deletes a preceding space if any. fn destruct_space(buf: &mut [Pair], end: &mut usize, state: &mut SpaceState) { if let SpaceState::Space(s) = *state { buf.copy_within(s + 1..*end, s); *end -= 1; } *state = SpaceState::Destructive; } /// Finds the first non-detached span in the list. fn select_span(children: &[Pair]) -> Span { Span::find(children.iter().map(|(c, _)| c.span())) }