typst/crates/typst-realize/src/lib.rs

//! Typst's realization subsystem.
//!
//! *Realization* is the process of recursively applying styling and, in
//! particular, show rules to produce well-known elements that can be processed
//! further.

use std::borrow::Cow;
use std::cell::LazyCell;

use arrayvec::ArrayVec;
use bumpalo::collections::{String as BumpString, Vec as BumpVec};
use comemo::Track;
use ecow::EcoString;
use typst_library::diag::{bail, At, SourceResult};
use typst_library::engine::Engine;
use typst_library::foundations::{
    Content, Context, ContextElem, Element, NativeElement, Recipe, RecipeIndex, Selector,
    SequenceElem, Show, ShowSet, Style, StyleChain, StyledElem, Styles, SymbolElem,
    Synthesize, Transformation,
};
use typst_library::html::{tag, HtmlElem};
use typst_library::introspection::{Locatable, SplitLocator, Tag, TagElem};
use typst_library::layout::{
    AlignElem, BoxElem, HElem, InlineElem, PageElem, PagebreakElem, VElem,
};
use typst_library::math::{EquationElem, Mathy};
use typst_library::model::{
    CiteElem, CiteGroup, DocumentElem, EnumElem, ListElem, ListItemLike, ListLike,
    ParElem, ParbreakElem, TermsElem,
};
use typst_library::routines::{Arenas, FragmentKind, Pair, RealizationKind};
use typst_library::text::{LinebreakElem, SmartQuoteElem, SpaceElem, TextElem};
use typst_syntax::Span;
use typst_utils::{SliceExt, SmallBitSet};

/// Realize content into a flat list of well-known, styled items.
#[typst_macros::time(name = "realize")]
pub fn realize<'a>(
    kind: RealizationKind,
    engine: &mut Engine,
    locator: &mut SplitLocator,
    arenas: &'a Arenas,
    content: &'a Content,
    styles: StyleChain<'a>,
) -> SourceResult<Vec<Pair<'a>>> {
    let mut s = State {
        engine,
        locator,
        arenas,
        rules: match kind {
            RealizationKind::LayoutDocument(_) => LAYOUT_RULES,
            RealizationKind::LayoutFragment(_) => LAYOUT_RULES,
            RealizationKind::LayoutPar => LAYOUT_PAR_RULES,
            RealizationKind::HtmlDocument(_) => HTML_DOCUMENT_RULES,
            RealizationKind::HtmlFragment(_) => HTML_FRAGMENT_RULES,
            RealizationKind::Math => MATH_RULES,
        },
        sink: vec![],
        groupings: ArrayVec::new(),
        outside: matches!(kind, RealizationKind::LayoutDocument(_)),
        may_attach: false,
        saw_parbreak: false,
        kind,
    };

    visit(&mut s, content, styles)?;
    finish(&mut s)?;

    Ok(s.sink)
}

/// Mutable state for realization.
///
/// Sadly, we need that many lifetimes because &mut references are invariant and
/// it would force the lifetimes of e.g. engine and locator to be equal if they
/// shared a lifetime. We can get around it by enforcing the lifetimes on
/// `fn realize`, but that makes it less flexible on the call site, which isn't
/// worth it.
///
/// The only interesting lifetime is 'a, which is that of the content that comes
/// in and goes out. It's the same 'a as on `fn realize`.
struct State<'a, 'x, 'y, 'z> {
    /// Defines what kind of realization we are performing.
    kind: RealizationKind<'x>,
    /// The engine.
    engine: &'x mut Engine<'y>,
    /// Assigns unique locations to elements.
    locator: &'x mut SplitLocator<'z>,
    /// Temporary storage arenas for lifetime extension during realization.
    arenas: &'a Arenas,
    /// The output elements of well-known types.
    sink: Vec<Pair<'a>>,
    /// Grouping rules used for realization.
    rules: &'x [&'x GroupingRule],
    /// Currently active groupings.
    groupings: ArrayVec<Grouping<'x>, MAX_GROUP_NESTING>,
    /// Whether we are currently not within any container or show rule output.
    /// This is used to determine page styles during layout.
    outside: bool,
    /// Whether now following attach spacing can survive.
    may_attach: bool,
    /// Whether we visited any paragraph breaks.
    saw_parbreak: bool,
}

/// Defines a rule for how certain elements shall be grouped during realization.
struct GroupingRule {
    /// When an element is visited that matches a rule with higher priority
    /// than one that is currently grouped, we start a nested group.
    priority: u8,
    /// Whether the grouping handles tags itself. If this is set to `false`,
    /// realization will transparently take care of tags and they will not
    /// be visible to `finish`.
    tags: bool,
    /// Defines which kinds of elements start and make up this kind of grouping.
    trigger: fn(&Content, &RealizationKind) -> bool,
    /// Defines elements that may appear in the interior of the grouping, but
    /// not at the edges.
    inner: fn(&Content) -> bool,
    /// Defines whether styles for this kind of element interrupt the grouping.
    interrupt: fn(Element) -> bool,
    /// Should convert the accumulated elements in `s.sink[start..]` into
    /// the grouped element.
    finish: fn(Grouped) -> SourceResult<()>,
}

/// A started grouping of some elements.
struct Grouping<'a> {
    /// The position in `s.sink` where the group starts.
    start: usize,
    /// Only applies to `PAR` grouping: Whether this paragraph group is
    /// interrupted, but not yet finished because it may be ignored due to being
    /// fully inline.
    interrupted: bool,
    /// The rule used for this grouping.
    rule: &'a GroupingRule,
}

/// The result of grouping.
struct Grouped<'a, 'x, 'y, 'z, 's> {
    /// The realization state.
    s: &'s mut State<'a, 'x, 'y, 'z>,
    /// The position in `s.sink` where the group starts.
    start: usize,
}

/// What to do with an element when encountering it during realization.
struct Verdict<'a> {
    /// Whether the element is already prepared (i.e. things that should only
    /// happen once have happened).
    prepared: bool,
    /// A map of styles to apply to the element.
    map: Styles,
    /// An optional show rule transformation to apply to the element.
    step: Option<ShowStep<'a>>,
}

/// A show rule transformation to apply to the element.
enum ShowStep<'a> {
    /// A user-defined transformational show rule.
    Recipe(&'a Recipe, RecipeIndex),
    /// The built-in show rule.
    Builtin,
}

/// A match of a regex show rule.
struct RegexMatch<'a> {
    /// The offset in the string that matched.
    offset: usize,
    /// The text that matched.
    text: EcoString,
    /// The style chain of the matching grouping.
    styles: StyleChain<'a>,
    /// The index of the recipe that matched.
    id: RecipeIndex,
    /// The recipe that matched.
    recipe: &'a Recipe,
}

/// State kept for space collapsing.
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
enum SpaceState {
    /// A following space will be collapsed.
    Destructive,
    /// A following space will be kept unless a destructive element follows.
    Supportive,
    /// A space exists at this index.
    Space(usize),
}

impl<'a> State<'a, '_, '_, '_> {
    /// Lifetime-extends some content.
    fn store(&self, content: Content) -> &'a Content {
        self.arenas.content.alloc(content)
    }

    /// Lifetime-extends some pairs.
    ///
    /// By using a `BumpVec` instead of a `alloc_slice_copy` we can reuse
    /// the space if no other bump allocations have been made by the time
    /// the `BumpVec` is dropped.
    fn store_slice(&self, pairs: &[Pair<'a>]) -> BumpVec<'a, Pair<'a>> {
        let mut vec = BumpVec::new_in(&self.arenas.bump);
        vec.extend_from_slice_copy(pairs);
        vec
    }
}

impl<'a, 'x, 'y, 'z, 's> Grouped<'a, 'x, 'y, 'z, 's> {
    /// Accesses the grouped elements.
    fn get(&self) -> &[Pair<'a>] {
        &self.s.sink[self.start..]
    }

    /// Accesses the grouped elements mutably.
    fn get_mut(&mut self) -> (&mut Vec<Pair<'a>>, usize) {
        (&mut self.s.sink, self.start)
    }

    /// Removes the grouped elements from the sink and retrieves back the state
    /// with which resulting elements can be visited.
    fn end(self) -> &'s mut State<'a, 'x, 'y, 'z> {
        self.s.sink.truncate(self.start);
        self.s
    }
}

/// Handles an arbitrary piece of content during realization.
fn visit<'a>(
    s: &mut State<'a, '_, '_, '_>,
    content: &'a Content,
    styles: StyleChain<'a>,
) -> SourceResult<()> {
    // Tags can always simply be pushed.
    if content.is::<TagElem>() {
        s.sink.push((content, styles));
        return Ok(());
    }

    // Transformations for math content based on the realization kind. Needs
    // to happen before show rules.
    if visit_math_rules(s, content, styles)? {
        return Ok(());
    }

    // Apply show rules and preparation.
    if visit_show_rules(s, content, styles)? {
        return Ok(());
    }

    // Recurse into sequences. Styled elements and sequences can currently also
    // have labels, so this needs to happen before they are handled.
    if let Some(sequence) = content.to_packed::<SequenceElem>() {
        for elem in &sequence.children {
            visit(s, elem, styles)?;
        }
        return Ok(());
    }

    // Recurse into styled elements.
    if let Some(styled) = content.to_packed::<StyledElem>() {
        return visit_styled(s, &styled.child, Cow::Borrowed(&styled.styles), styles);
    }

    // Apply grouping --- where multiple elements are collected and then
    // processed together (typically being transformed into one).
    if visit_grouping_rules(s, content, styles)? {
        return Ok(());
    }

    // Some elements are skipped based on specific circumstances.
    if visit_filter_rules(s, content, styles)? {
        return Ok(());
    }

    // No further transformations to apply, so we can finally just push it to
    // the output!
    s.sink.push((content, styles));

    Ok(())
}

// Handles special cases for math in normal content and nested equations in
// math.
fn visit_math_rules<'a>(
    s: &mut State<'a, '_, '_, '_>,
    content: &'a Content,
    styles: StyleChain<'a>,
) -> SourceResult<bool> {
    if let RealizationKind::Math = s.kind {
        // Transparently recurse into equations nested in math, so that things
        // like this work:
        // ```
        // #let my = $pi$
        // $ my r^2 $
        // ```
        if let Some(elem) = content.to_packed::<EquationElem>() {
            visit(s, &elem.body, styles)?;
            return Ok(true);
        }

        // In normal realization, we apply regex show rules to consecutive
        // textual elements via `TEXTUAL` grouping. However, in math, this is
        // not desirable, so we just do it on a per-element basis.
        if let Some(elem) = content.to_packed::<SymbolElem>() {
            if let Some(m) =
                find_regex_match_in_str(elem.text.encode_utf8(&mut [0; 4]), styles)
            {
                visit_regex_match(s, &[(content, styles)], m)?;
                return Ok(true);
            }
        } else if let Some(elem) = content.to_packed::<TextElem>() {
            if let Some(m) = find_regex_match_in_str(&elem.text, styles) {
                visit_regex_match(s, &[(content, styles)], m)?;
                return Ok(true);
            }
        }
    } else {
        // Transparently wrap mathy content into equations.
        if content.can::<dyn Mathy>() && !content.is::<EquationElem>() {
            let eq = EquationElem::new(content.clone()).pack().spanned(content.span());
            visit(s, s.store(eq), styles)?;
            return Ok(true);
        }

        // Symbols in non-math content transparently convert to `TextElem` so we
        // don't have to handle them in non-math layout.
        if let Some(elem) = content.to_packed::<SymbolElem>() {
            let mut text = TextElem::packed(elem.text).spanned(elem.span());
            if let Some(label) = elem.label() {
                text.set_label(label);
            }
            visit(s, s.store(text), styles)?;
            return Ok(true);
        }
    }

    Ok(false)
}

/// Tries to apply show rules to or prepare content. Returns `true` if the
/// element was handled.
fn visit_show_rules<'a>(
    s: &mut State<'a, '_, '_, '_>,
    content: &'a Content,
    styles: StyleChain<'a>,
) -> SourceResult<bool> {
    // Determines whether and how to proceed with show rule application.
    let Some(Verdict { prepared, mut map, step }) = verdict(s.engine, content, styles)
    else {
        return Ok(false);
    };

    // Create a fresh copy that we can mutate.
    let mut output = Cow::Borrowed(content);

    // If the element isn't yet prepared (we're seeing it for the first time),
    // prepare it.
    let mut tags = None;
    if !prepared {
        tags = prepare(s.engine, s.locator, output.to_mut(), &mut map, styles)?;
    }

    // Apply a show rule step, if there is one.
    if let Some(step) = step {
        let chained = styles.chain(&map);
        let result = match step {
            // Apply a user-defined show rule.
            ShowStep::Recipe(recipe, guard) => {
                let context = Context::new(output.location(), Some(chained));
                recipe.apply(
                    s.engine,
                    context.track(),
                    output.into_owned().guarded(guard),
                )
            }

            // Apply a built-in show rule.
            ShowStep::Builtin => {
                output.with::<dyn Show>().unwrap().show(s.engine, chained)
            }
        };

        // Errors in show rules don't terminate compilation immediately. We just
        // continue with empty content for them and show all errors together, if
        // they remain by the end of the introspection loop.
        //
        // This way, we can ignore errors that only occur in earlier iterations
        // and also show more useful errors at once.
        output = Cow::Owned(s.engine.delay(result));
    }

    // Lifetime-extend the realized content if necessary.
    let realized = match output {
        Cow::Borrowed(realized) => realized,
        Cow::Owned(realized) => s.store(realized),
    };

    // Push start tag.
    let (start, end) = tags.unzip();
    if let Some(tag) = start {
        visit(s, s.store(TagElem::packed(tag)), styles)?;
    }

    let prev_outside = s.outside;
    s.outside &= content.is::<ContextElem>();
    s.engine.route.increase();
    s.engine.route.check_show_depth().at(content.span())?;

    visit_styled(s, realized, Cow::Owned(map), styles)?;

    s.outside = prev_outside;
    s.engine.route.decrease();

    // Push end tag.
    if let Some(tag) = end {
        visit(s, s.store(TagElem::packed(tag)), styles)?;
    }

    Ok(true)
}

/// Inspects a target element and the current styles and determines how to
/// proceed with the styling.
fn verdict<'a>(
    engine: &mut Engine,
    target: &'a Content,
    styles: StyleChain<'a>,
) -> Option<Verdict<'a>> {
    let prepared = target.is_prepared();
    let mut map = Styles::new();
    let mut step = None;

    // Do pre-synthesis on a cloned element to be able to match on synthesized
    // fields before real synthesis runs (during preparation). It's really
    // unfortunate that we have to do this, but otherwise
    // `show figure.where(kind: table)` won't work :(
    let mut target = target;
    let mut slot;
    if !prepared && target.can::<dyn Synthesize>() {
        slot = target.clone();
        slot.with_mut::<dyn Synthesize>()
            .unwrap()
            .synthesize(engine, styles)
            .ok();
        target = &slot;
    }

    // Lazily computes the total number of recipes in the style chain. We need
    // it to determine whether a particular show rule was already applied to the
    // `target` previously. For this purpose, show rules are indexed from the
    // top of the chain as the chain might grow to the bottom.
    let depth = LazyCell::new(|| styles.recipes().count());

    for (r, recipe) in styles.recipes().enumerate() {
        // We're not interested in recipes that don't match.
        if !recipe
            .selector()
            .is_some_and(|selector| selector.matches(target, Some(styles)))
        {
            continue;
        }

        // Special handling for show-set rules.
        if let Transformation::Style(transform) = recipe.transform() {
            if !prepared {
                map.apply(transform.clone());
            }
            continue;
        }

        // If we already have a show step, don't look for one.
        if step.is_some() {
            continue;
        }

        // Check whether this show rule was already applied to the target.
        let index = RecipeIndex(*depth - r);
        if target.is_guarded(index) {
            continue;
        }

        // We'll apply this recipe.
        step = Some(ShowStep::Recipe(recipe, index));

        // If we found a show rule and are already prepared, there is nothing
        // else to do, so we can just break. If we are not yet prepared,
        // continue searching for potential show-set styles.
        if prepared {
            break;
        }
    }

    // If we found no user-defined rule, also consider the built-in show rule.
    if step.is_none() && target.can::<dyn Show>() {
        step = Some(ShowStep::Builtin);
    }

    // If there's no nothing to do, there is also no verdict.
    if step.is_none()
        && map.is_empty()
        && (prepared || {
            target.label().is_none()
                && target.location().is_none()
                && !target.can::<dyn ShowSet>()
                && !target.can::<dyn Locatable>()
                && !target.can::<dyn Synthesize>()
        })
    {
        return None;
    }

    Some(Verdict { prepared, map, step })
}

/// This is only executed the first time an element is visited.
fn prepare(
    engine: &mut Engine,
    locator: &mut SplitLocator,
    target: &mut Content,
    map: &mut Styles,
    styles: StyleChain,
) -> SourceResult<Option<(Tag, Tag)>> {
    // Generate a location for the element, which uniquely identifies it in
    // the document. This has some overhead, so we only do it for elements
    // that are explicitly marked as locatable and labelled elements.
    //
    // The element could already have a location even if it is not prepared
    // when it stems from a query.
    let key = typst_utils::hash128(&target);
    if target.location().is_none()
        && (target.can::<dyn Locatable>() || target.label().is_some())
    {
        let loc = locator.next_location(engine.introspector, key);
        target.set_location(loc);
    }

    // Apply built-in show-set rules. User-defined show-set rules are already
    // considered in the map built while determining the verdict.
    if let Some(show_settable) = target.with::<dyn ShowSet>() {
        map.apply(show_settable.show_set(styles));
    }

    // If necessary, generated "synthesized" fields (which are derived from
    // other fields or queries). Do this after show-set so that show-set styles
    // are respected.
    if let Some(synthesizable) = target.with_mut::<dyn Synthesize>() {
        synthesizable.synthesize(engine, styles.chain(map))?;
    }

    // Copy style chain fields into the element itself, so that they are
    // available in rules.
    target.materialize(styles.chain(map));

    // If the element is locatable, create start and end tags to be able to find
    // the element in the frames after layout. Do this after synthesis and
    // materialization, so that it includes the synthesized fields. Do it before
    // marking as prepared so that show-set rules will apply to this element
    // when queried.
    let tags = target
        .location()
        .map(|loc| (Tag::Start(target.clone()), Tag::End(loc, key)));

    // Ensure that this preparation only runs once by marking the element as
    // prepared.
    target.mark_prepared();

    Ok(tags)
}

/// Handles a styled element.
fn visit_styled<'a>(
    s: &mut State<'a, '_, '_, '_>,
    content: &'a Content,
    mut local: Cow<'a, Styles>,
    outer: StyleChain<'a>,
) -> SourceResult<()> {
    // Nothing to do if the styles are actually empty.
    if local.is_empty() {
        return visit(s, content, outer);
    }

    // Check for document and page styles.
    let mut pagebreak = false;
    for style in local.iter() {
        let Some(elem) = style.element() else { continue };
        if elem == DocumentElem::elem() {
            if let Some(info) = s.kind.as_document_mut() {
                info.populate(&local)
            } else {
                bail!(
                    style.span(),
                    "document set rules are not allowed inside of containers"
                );
            }
        } else if elem == PageElem::elem() {
            if !matches!(s.kind, RealizationKind::LayoutDocument(_)) {
                bail!(
                    style.span(),
                    "page configuration is not allowed inside of containers"
                );
            }

            // When there are page styles, we "break free" from our show rule cage.
            pagebreak = true;
            s.outside = true;
        }
    }

    // If we are not within a container or show rule, mark the styles as
    // "outside". This will allow them to be lifted to the page level.
    if s.outside {
        local = Cow::Owned(local.into_owned().outside());
    }

    // Lifetime-extend the styles if necessary.
    let outer = s.arenas.bump.alloc(outer);
    let local = match local {
        Cow::Borrowed(map) => map,
        Cow::Owned(owned) => &*s.arenas.styles.alloc(owned),
    };

    // Generate a weak pagebreak if there is a page interruption. For the
    // starting pagebreak we only want the styles before and including the
    // interruptions, not trailing styles that happen to be in the same `Styles`
    // list, so we trim the local styles.
    if pagebreak {
        let relevant = local
            .as_slice()
            .trim_end_matches(|style| style.element() != Some(PageElem::elem()));
        visit(s, PagebreakElem::shared_weak(), outer.chain(relevant))?;
    }

    finish_interrupted(s, local)?;
    visit(s, content, outer.chain(local))?;
    finish_interrupted(s, local)?;

    // Generate a weak "boundary" pagebreak at the end. In comparison to a
    // normal weak pagebreak, the styles of this are ignored during layout, so
    // it doesn't really matter what we use here.
    if pagebreak {
        visit(s, PagebreakElem::shared_boundary(), *outer)?;
    }

    Ok(())
}

/// Tries to group the content in an active group or start a new one if any
/// grouping rule matches. Returns `true` if the element was grouped.
fn visit_grouping_rules<'a>(
    s: &mut State<'a, '_, '_, '_>,
    content: &'a Content,
    styles: StyleChain<'a>,
) -> SourceResult<bool> {
    let matching = s.rules.iter().find(|&rule| (rule.trigger)(content, &s.kind));

    // Try to continue or finish an existing grouping.
    let mut i = 0;
    while let Some(active) = s.groupings.last() {
        // Start a nested group if a rule with higher priority matches.
        if matching.is_some_and(|rule| rule.priority > active.rule.priority) {
            break;
        }

        // If the element can be added to the active grouping, do it.
        if !active.interrupted
            && ((active.rule.trigger)(content, &s.kind) || (active.rule.inner)(content))
        {
            s.sink.push((content, styles));
            return Ok(true);
        }

        finish_innermost_grouping(s)?;
        i += 1;
        if i > 4096 {
            // It seems like this case is only hit when there is a cycle between
            // a show rule and a grouping rule. The show rule produces content
            // that is matched by a grouping rule, which is then again processed
            // by the show rule, and so on. The two must be at an equilibrium,
            // otherwise either the "maximum show rule depth" or "maximum
            // grouping depth" errors are triggered.
            bail!(
                content.span(),
                "maximum realization iterations exceeded";
                hint: "maybe there is a cycle between a show rule that produces content,\
                       which is matched by a grouping rule that triggers the show rule",
            );
        }
    }

    // Start a new grouping.
    if let Some(rule) = matching {
        let start = s.sink.len();
        s.groupings.push(Grouping { start, rule, interrupted: false });
        s.sink.push((content, styles));
        return Ok(true);
    }

    Ok(false)
}

/// Some elements don't make it to the sink depending on the realization kind
/// and current state.
fn visit_filter_rules<'a>(
    s: &mut State<'a, '_, '_, '_>,
    content: &'a Content,
    styles: StyleChain<'a>,
) -> SourceResult<bool> {
    if matches!(s.kind, RealizationKind::LayoutPar | RealizationKind::Math) {
        return Ok(false);
    }

    if content.is::<SpaceElem>() {
        // Outside of maths and paragraph realization, spaces that were not
        // collected by the paragraph grouper don't interest us.
        return Ok(true);
    } else if content.is::<ParbreakElem>() {
        // Paragraph breaks are only a boundary for paragraph grouping, we don't
        // need to store them.
        s.may_attach = false;
        s.saw_parbreak = true;
        return Ok(true);
    } else if !s.may_attach
        && content.to_packed::<VElem>().is_some_and(|elem| elem.attach(styles))
    {
        // Attach spacing collapses if not immediately following a paragraph.
        return Ok(true);
    }

    // Remember whether following attach spacing can survive.
    s.may_attach = content.is::<ParElem>();

    Ok(false)
}

/// Finishes all grouping.
fn finish(s: &mut State) -> SourceResult<()> {
    finish_grouping_while(s, |s| {
        // If this is a fragment realization and all we've got is inline
        // content, don't turn it into a paragraph.
        if is_fully_inline(s) {
            *s.kind.as_fragment_mut().unwrap() = FragmentKind::Inline;
            s.groupings.pop();
            collapse_spaces(&mut s.sink, 0);
            false
        } else {
            !s.groupings.is_empty()
        }
    })?;

    // In paragraph and math realization, spaces are top-level.
    if matches!(s.kind, RealizationKind::LayoutPar | RealizationKind::Math) {
        collapse_spaces(&mut s.sink, 0);
    }

    Ok(())
}

/// Finishes groupings while any active group is interrupted by the styles.
fn finish_interrupted(s: &mut State, local: &Styles) -> SourceResult<()> {
    let mut last = None;
    for elem in local.iter().filter_map(|style| style.element()) {
        if last == Some(elem) {
            continue;
        }
        finish_grouping_while(s, |s| {
            s.groupings.iter().any(|grouping| (grouping.rule.interrupt)(elem))
                && if is_fully_inline(s) {
                    s.groupings[0].interrupted = true;
                    false
                } else {
                    true
                }
        })?;
        last = Some(elem);
    }
    Ok(())
}

/// Finishes groupings while `f` returns `true`.
fn finish_grouping_while<F>(s: &mut State, mut f: F) -> SourceResult<()>
where
    F: FnMut(&mut State) -> bool,
{
    // Finishing of a group may result in new content and new grouping. This
    // can, in theory, go on for a bit. To prevent it from becoming an infinite
    // loop, we keep track of the iteration count.
    let mut i = 0;
    while f(s) {
        finish_innermost_grouping(s)?;
        i += 1;
        if i > 512 {
            bail!(Span::detached(), "maximum grouping depth exceeded");
        }
    }
    Ok(())
}

/// Finishes the currently innermost grouping.
fn finish_innermost_grouping(s: &mut State) -> SourceResult<()> {
    // The grouping we are interrupting.
    let Grouping { start, rule, .. } = s.groupings.pop().unwrap();

    // Trim trailing non-trigger elements.
    let trimmed = s.sink[start..].trim_end_matches(|(c, _)| !(rule.trigger)(c, &s.kind));
    let end = start + trimmed.len();
    let tail = s.store_slice(&s.sink[end..]);
    s.sink.truncate(end);

    // If the grouping is not interested in tags, remove and collect them.
    let mut tags = BumpVec::<Pair>::new_in(&s.arenas.bump);
    if !rule.tags {
        let mut k = start;
        for i in start..end {
            if s.sink[i].0.is::<TagElem>() {
                tags.push(s.sink[i]);
                continue;
            }

            if k < i {
                s.sink[k] = s.sink[i];
            }
            k += 1;
        }
        s.sink.truncate(k);
    }

    // Execute the grouping's finisher rule.
    (rule.finish)(Grouped { s, start })?;

    // Visit the tags and staged elements again.
    for &(content, styles) in tags.iter().chain(&tail) {
        visit(s, content, styles)?;
    }

    Ok(())
}

/// The maximum number of nested groups that are possible. Corresponds to the
/// number of unique priority levels.
const MAX_GROUP_NESTING: usize = 3;

/// Grouping rules used in layout realization.
static LAYOUT_RULES: &[&GroupingRule] = &[&TEXTUAL, &PAR, &CITES, &LIST, &ENUM, &TERMS];

/// Grouping rules used in paragraph layout realization.
static LAYOUT_PAR_RULES: &[&GroupingRule] = &[&TEXTUAL, &CITES, &LIST, &ENUM, &TERMS];

/// Grouping rules used in HTML root realization.
static HTML_DOCUMENT_RULES: &[&GroupingRule] =
    &[&TEXTUAL, &PAR, &CITES, &LIST, &ENUM, &TERMS];

/// Grouping rules used in HTML fragment realization.
static HTML_FRAGMENT_RULES: &[&GroupingRule] =
    &[&TEXTUAL, &PAR, &CITES, &LIST, &ENUM, &TERMS];

/// Grouping rules used in math realization.
static MATH_RULES: &[&GroupingRule] = &[&CITES, &LIST, &ENUM, &TERMS];

/// Groups adjacent textual elements for text show rule application.
static TEXTUAL: GroupingRule = GroupingRule {
    priority: 3,
    tags: true,
    trigger: |content, _| {
        let elem = content.elem();
        // Note that `SymbolElem` converts into `TextElem` before textual show
        // rules run, and we apply textual rules to elements manually during
        // math realization, so we don't check for it here.
        elem == TextElem::elem()
            || elem == LinebreakElem::elem()
            || elem == SmartQuoteElem::elem()
    },
    inner: |content| content.elem() == SpaceElem::elem(),
    // Any kind of style interrupts this kind of grouping since regex show
    // rules cannot match over style changes anyway.
    interrupt: |_| true,
    finish: finish_textual,
};

/// Collects inline-level elements into a `ParElem`.
static PAR: GroupingRule = GroupingRule {
    priority: 1,
    tags: true,
    trigger: |content, kind| {
        let elem = content.elem();
        elem == TextElem::elem()
            || elem == HElem::elem()
            || elem == LinebreakElem::elem()
            || elem == SmartQuoteElem::elem()
            || elem == InlineElem::elem()
            || elem == BoxElem::elem()
            || (kind.is_html()
                && content
                    .to_packed::<HtmlElem>()
                    .is_some_and(|elem| tag::is_inline_by_default(elem.tag)))
    },
    inner: |content| content.elem() == SpaceElem::elem(),
    interrupt: |elem| elem == ParElem::elem() || elem == AlignElem::elem(),
    finish: finish_par,
};

/// Collects `CiteElem`s into `CiteGroup`s.
static CITES: GroupingRule = GroupingRule {
    priority: 2,
    tags: false,
    trigger: |content, _| content.elem() == CiteElem::elem(),
    inner: |content| content.elem() == SpaceElem::elem(),
    interrupt: |elem| {
        elem == CiteGroup::elem() || elem == ParElem::elem() || elem == AlignElem::elem()
    },
    finish: finish_cites,
};

/// Builds a `ListElem` from grouped `ListItems`s.
static LIST: GroupingRule = list_like_grouping::<ListElem>();

/// Builds an `EnumElem` from grouped `EnumItem`s.
static ENUM: GroupingRule = list_like_grouping::<EnumElem>();

/// Builds a `TermsElem` from grouped `TermItem`s.
static TERMS: GroupingRule = list_like_grouping::<TermsElem>();

/// Collects `ListItemLike` elements into a `ListLike` element.
const fn list_like_grouping<T: ListLike>() -> GroupingRule {
    GroupingRule {
        priority: 2,
        tags: false,
        trigger: |content, _| content.elem() == T::Item::elem(),
        inner: |content| {
            let elem = content.elem();
            elem == SpaceElem::elem() || elem == ParbreakElem::elem()
        },
        interrupt: |elem| elem == T::elem() || elem == AlignElem::elem(),
        finish: finish_list_like::<T>,
    }
}

/// Processes grouped textual elements.
///
/// Specifically, it searches for regex matches in grouped textual elements and
/// - if there was a match, visits the results recursively,
/// - if there was no match, tries to simply implicitly use the grouped elements
///   as part of a paragraph grouping,
/// - if that's not possible because another grouping is active, temporarily
///   disables textual grouping and revisits the elements.
fn finish_textual(Grouped { s, mut start }: Grouped) -> SourceResult<()> {
    // Try to find a regex match in the grouped textual elements. Returns early
    // if there is one.
    if visit_textual(s, start)? {
        return Ok(());
    }

    // There was no regex match, so we need to collect the text into a paragraph
    // grouping. To do that, we first terminate all non-paragraph groupings.
    if in_non_par_grouping(s) {
        let elems = s.store_slice(&s.sink[start..]);
        s.sink.truncate(start);
        finish_grouping_while(s, in_non_par_grouping)?;
        start = s.sink.len();
        s.sink.extend(elems);
    }

    // Now, there are only two options:
    // 1. We are already in a paragraph group. In this case, the elements just
    //    transparently become part of it.
    // 2. There is no group at all. In this case, we create one.
    if s.groupings.is_empty() && s.rules.iter().any(|&rule| std::ptr::eq(rule, &PAR)) {
        s.groupings.push(Grouping { start, rule: &PAR, interrupted: false });
    }

    Ok(())
}

/// Whether there is an active grouping, but it is not a `PAR` grouping.
fn in_non_par_grouping(s: &mut State) -> bool {
    s.groupings.last().is_some_and(|grouping| {
        !std::ptr::eq(grouping.rule, &PAR) || grouping.interrupted
    })
}

/// Whether there is exactly one active grouping, it is a `PAR` grouping, and it
/// spans the whole sink (with the exception of leading tags).
fn is_fully_inline(s: &State) -> bool {
    s.kind.is_fragment()
        && !s.saw_parbreak
        && match s.groupings.as_slice() {
            [grouping] => {
                std::ptr::eq(grouping.rule, &PAR)
                    && s.sink[..grouping.start].iter().all(|(c, _)| c.is::<TagElem>())
            }
            _ => false,
        }
}

/// Builds the `ParElem` from inline-level elements.
fn finish_par(mut grouped: Grouped) -> SourceResult<()> {
    // Collapse unsupported spaces in-place.
    let (sink, start) = grouped.get_mut();
    collapse_spaces(sink, start);

    // Collect the children.
    let elems = grouped.get();
    let span = select_span(elems);
    let (body, trunk) = repack(elems);

    // Create and visit the paragraph.
    let s = grouped.end();
    let elem = ParElem::new(body).pack().spanned(span);
    visit(s, s.store(elem), trunk)
}

/// Builds the `CiteGroup` from `CiteElem`s.
fn finish_cites(grouped: Grouped) -> SourceResult<()> {
    // Collect the children.
    let elems = grouped.get();
    let span = select_span(elems);
    let trunk = elems[0].1;
    let children = elems
        .iter()
        .filter_map(|(c, _)| c.to_packed::<CiteElem>())
        .cloned()
        .collect();

    // Create and visit the citation group.
    let s = grouped.end();
    let elem = CiteGroup::new(children).pack().spanned(span);
    visit(s, s.store(elem), trunk)
}

/// Builds the `ListLike` element from `ListItemLike` elements.
fn finish_list_like<T: ListLike>(grouped: Grouped) -> SourceResult<()> {
    // Collect the children.
    let elems = grouped.get();
    let span = select_span(elems);
    let tight = !elems.iter().any(|(c, _)| c.is::<ParbreakElem>());
    let styles = elems.iter().filter(|(c, _)| c.is::<T::Item>()).map(|&(_, s)| s);
    let trunk = StyleChain::trunk(styles).unwrap();
    let trunk_depth = trunk.links().count();
    let children = elems
        .iter()
        .copied()
        .filter_map(|(c, s)| {
            let item = c.to_packed::<T::Item>()?.clone();
            let local = s.suffix(trunk_depth);
            Some(T::Item::styled(item, local))
        })
        .collect();

    // Create and visit the list.
    let s = grouped.end();
    let elem = T::create(children, tight).pack().spanned(span);
    visit(s, s.store(elem), trunk)
}

/// Visit textual elements in `s.sink[start..]` and apply regex show rules to
/// them.
fn visit_textual(s: &mut State, start: usize) -> SourceResult<bool> {
    // Try to find a regex match in the grouped textual elements.
    if let Some(m) = find_regex_match_in_elems(s, &s.sink[start..]) {
        collapse_spaces(&mut s.sink, start);
        let elems = s.store_slice(&s.sink[start..]);
        s.sink.truncate(start);
        visit_regex_match(s, &elems, m)?;
        return Ok(true);
    }

    Ok(false)
}

/// Finds the leftmost regex match for this style chain in the given textual
/// elements.
///
/// Collects the element's merged textual representation into the bump arena.
/// This merging also takes into account space collapsing so that we don't need
/// to call `collapse_spaces` on every textual group, performing yet another
/// linear pass. We only collapse the spaces elements themselves on the cold
/// path where there is an actual match.
fn find_regex_match_in_elems<'a>(
    s: &State,
    elems: &[Pair<'a>],
) -> Option<RegexMatch<'a>> {
    let mut buf = BumpString::new_in(&s.arenas.bump);
    let mut base = 0;
    let mut leftmost = None;
    let mut current = StyleChain::default();
    let mut space = SpaceState::Destructive;

    for &(content, styles) in elems {
        if content.is::<TagElem>() {
            continue;
        }

        let linebreak = content.is::<LinebreakElem>();
        if linebreak {
            if let SpaceState::Space(_) = space {
                buf.pop();
            }
        }

        if styles != current && !buf.is_empty() {
            leftmost = find_regex_match_in_str(&buf, current);
            if leftmost.is_some() {
                break;
            }
            base += buf.len();
            buf.clear();
        }

        current = styles;
        space = if content.is::<SpaceElem>() {
            if space != SpaceState::Supportive {
                continue;
            }
            buf.push(' ');
            SpaceState::Space(0)
        } else if linebreak {
            buf.push('\n');
            SpaceState::Destructive
        } else if let Some(elem) = content.to_packed::<SmartQuoteElem>() {
            buf.push(if elem.double(styles) { '"' } else { '\'' });
            SpaceState::Supportive
        } else if let Some(elem) = content.to_packed::<TextElem>() {
            buf.push_str(&elem.text);
            SpaceState::Supportive
        } else {
            panic!("tried to find regex match in non-textual elements");
        };
    }

    if leftmost.is_none() {
        leftmost = find_regex_match_in_str(&buf, current);
    }

    leftmost.map(|m| RegexMatch { offset: base + m.offset, ..m })
}

/// Finds the leftmost regex match for this style chain in the given text.
fn find_regex_match_in_str<'a>(
    text: &str,
    styles: StyleChain<'a>,
) -> Option<RegexMatch<'a>> {
    let mut r = 0;
    let mut revoked = SmallBitSet::new();
    let mut leftmost: Option<(regex::Match, RecipeIndex, &Recipe)> = None;

    let depth = LazyCell::new(|| styles.recipes().count());

    for entry in styles.entries() {
        let recipe = match &**entry {
            Style::Recipe(recipe) => recipe,
            Style::Property(_) => continue,
            Style::Revocation(index) => {
                revoked.insert(index.0);
                continue;
            }
        };
        r += 1;

        let Some(Selector::Regex(regex)) = recipe.selector() else { continue };
        let Some(m) = regex.find(text) else { continue };

        // Make sure we don't get any empty matches.
        if m.range().is_empty() {
            continue;
        }

        // If we already have a match that is equally or more to the left, we're
        // not interested in this new match.
        if leftmost.is_some_and(|(p, ..)| p.start() <= m.start()) {
            continue;
        }

        // Check whether the rule is already revoked. Do it only now to not
        // compute the depth unnecessarily. We subtract 1 from r because we
        // already incremented it.
        let index = RecipeIndex(*depth - (r - 1));
        if revoked.contains(index.0) {
            continue;
        }

        leftmost = Some((m, index, recipe));
    }

    leftmost.map(|(m, id, recipe)| RegexMatch {
        offset: m.start(),
        text: m.as_str().into(),
        id,
        recipe,
        styles,
    })
}

/// Visit a match of a regular expression.
///
/// This first revisits all elements before the match, potentially slicing up
/// a text element, then the transformed match, and then the remaining elements
/// after the match.
fn visit_regex_match<'a>(
    s: &mut State<'a, '_, '_, '_>,
    elems: &[Pair<'a>],
    m: RegexMatch<'a>,
) -> SourceResult<()> {
    let match_range = m.offset..m.offset + m.text.len();

    // Replace with the correct intuitive element kind: if matching against a
    // lone symbol, return a `SymbolElem`, otherwise return a newly composed
    // `TextElem`. We should only match against a `SymbolElem` during math
    // realization (`RealizationKind::Math`).
    let piece = match elems {
        &[(lone, _)] if lone.is::<SymbolElem>() => lone.clone(),
        _ => TextElem::packed(m.text),
    };

    let context = Context::new(None, Some(m.styles));
    let output = m.recipe.apply(s.engine, context.track(), piece)?;

    let mut cursor = 0;
    let mut output = Some(output);
    let mut visit_unconsumed_match = |s: &mut State<'a, '_, '_, '_>| -> SourceResult<()> {
        if let Some(output) = output.take() {
            let revocation = Style::Revocation(m.id).into();
            let outer = s.arenas.bump.alloc(m.styles);
            let chained = outer.chain(s.arenas.styles.alloc(revocation));
            visit(s, s.store(output), chained)?;
        }
        Ok(())
    };

    for &(content, styles) in elems {
        // Just forward tags.
        if content.is::<TagElem>() {
            visit(s, content, styles)?;
            continue;
        }

        // At this point, we can have a `TextElem`, `SymbolElem`, `SpaceElem`,
        // `LinebreakElem`, or `SmartQuoteElem`. We now determine the range of
        // the element.
        let len = if let Some(elem) = content.to_packed::<TextElem>() {
            elem.text.len()
        } else if let Some(elem) = content.to_packed::<SymbolElem>() {
            elem.text.len_utf8()
        } else {
            1 // The rest are Ascii, so just one byte.
        };
        let elem_range = cursor..cursor + len;

        // If the element starts before the start of match, visit it fully or
        // sliced.
        if elem_range.start < match_range.start {
            if elem_range.end <= match_range.start {
                visit(s, content, styles)?;
            } else {
                let mut elem = content.to_packed::<TextElem>().unwrap().clone();
                elem.text = elem.text[..match_range.start - elem_range.start].into();
                visit(s, s.store(elem.pack()), styles)?;
            }
        }

        // When the match starts before this element ends, visit it.
        if match_range.start < elem_range.end {
            visit_unconsumed_match(s)?;
        }

        // If the element ends after the end of the match, visit if fully or
        // sliced.
        if elem_range.end > match_range.end {
            if elem_range.start >= match_range.end {
                visit(s, content, styles)?;
            } else {
                let mut elem = content.to_packed::<TextElem>().unwrap().clone();
                elem.text = elem.text[match_range.end - elem_range.start..].into();
                visit(s, s.store(elem.pack()), styles)?;
            }
        }

        cursor = elem_range.end;
    }

    // If the match wasn't consumed yet, visit it. This shouldn't really happen
    // in practice (we'd need to have an empty match at the end), but it's an
    // extra fail-safe.
    visit_unconsumed_match(s)?;

    Ok(())
}

/// Collapses all spaces within `buf[start..]` that are at the edges or in the
/// vicinity of destructive elements.
fn collapse_spaces(buf: &mut Vec<Pair>, start: usize) {
    let mut state = SpaceState::Destructive;
    let mut k = start;

    // We do one pass over the elements, backshifting everything as necessary
    // when a space collapses. The variable `i` is our cursor in the original
    // elements. The variable `k` is our cursor in the result. At all times, we
    // have `k <= i`, so we can do it in place.
    for i in start..buf.len() {
        let (content, styles) = buf[i];

        // Determine the next state.
        if content.is::<TagElem>() {
            // Nothing to do.
        } else if content.is::<SpaceElem>() {
            if state != SpaceState::Supportive {
                continue;
            }
            state = SpaceState::Space(k);
        } else if content.is::<LinebreakElem>() {
            destruct_space(buf, &mut k, &mut state);
        } else if let Some(elem) = content.to_packed::<HElem>() {
            if elem.amount.is_fractional() || elem.weak(styles) {
                destruct_space(buf, &mut k, &mut state);
            }
        } else {
            state = SpaceState::Supportive;
        };

        // Copy over normal elements (in place).
        if k < i {
            buf[k] = buf[i];
        }
        k += 1;
    }

    destruct_space(buf, &mut k, &mut state);

    // Delete all the excess that's left due to the gaps produced by spaces.
    buf.truncate(k);
}

/// Deletes a preceding space if any.
fn destruct_space(buf: &mut [Pair], end: &mut usize, state: &mut SpaceState) {
    if let SpaceState::Space(s) = *state {
        buf.copy_within(s + 1..*end, s);
        *end -= 1;
    }
    *state = SpaceState::Destructive;
}

/// Finds the first non-detached span in the list.
fn select_span(children: &[Pair]) -> Span {
    Span::find(children.iter().map(|(c, _)| c.span()))
}

/// Turn realized content with styles back into owned content and a trunk style
/// chain.
fn repack<'a>(buf: &[Pair<'a>]) -> (Content, StyleChain<'a>) {
    let trunk = StyleChain::trunk(buf.iter().map(|&(_, s)| s)).unwrap_or_default();
    let depth = trunk.links().count();

    let mut seq = Vec::with_capacity(buf.len());

    for (chain, group) in buf.group_by_key(|&(_, s)| s) {
        let iter = group.iter().map(|&(c, _)| c.clone());
        let suffix = chain.suffix(depth);
        if suffix.is_empty() {
            seq.extend(iter);
        } else if let &[(element, _)] = group {
            seq.push(element.clone().styled_with_map(suffix));
        } else {
            seq.push(Content::sequence(iter).styled_with_map(suffix));
        }
    }

    (Content::sequence(seq), trunk)
}