From 7905de67bcf3ca9b65c076ca02ec4726ba02d22c Mon Sep 17 00:00:00 2001 From: PgBiel <9021226+PgBiel@users.noreply.github.com> Date: Fri, 10 May 2024 11:47:02 -0300 Subject: [PATCH] Add parameter to select pages to be exported by CLI (#4039) --- crates/typst-cli/src/args.rs | 64 +++++++++++++++++++++++++ crates/typst-cli/src/compile.rs | 44 ++++++++++++++---- crates/typst-pdf/src/lib.rs | 25 ++++++---- crates/typst-pdf/src/outline.rs | 16 ++++++- crates/typst-pdf/src/page.rs | 82 ++++++++++++++++++++++++--------- crates/typst-pdf/src/pattern.rs | 2 +- crates/typst/src/layout/page.rs | 34 ++++++++++++++ tests/src/run.rs | 2 +- 8 files changed, 226 insertions(+), 43 deletions(-) diff --git a/crates/typst-cli/src/args.rs b/crates/typst-cli/src/args.rs index c115d5a9f..f49d35c75 100644 --- a/crates/typst-cli/src/args.rs +++ b/crates/typst-cli/src/args.rs @@ -1,5 +1,8 @@ use std::fmt::{self, Display, Formatter}; +use std::num::NonZeroUsize; +use std::ops::RangeInclusive; use std::path::PathBuf; +use std::str::FromStr; use chrono::{DateTime, Utc}; use clap::builder::ValueParser; @@ -76,6 +79,18 @@ pub struct CompileCommand { #[clap(required_if_eq("input", "-"), value_parser = ValueParser::new(output_value_parser))] pub output: Option, + /// Which pages to export. When unspecified, all document pages are exported. + /// + /// Pages to export are separated by commas, and can be either simple page + /// numbers (e.g. '2,5' to export only pages 2 and 5) or page ranges + /// (e.g. '2,3-6,8-' to export page 2, pages 3 to 6 (inclusive), page 8 and + /// any pages after it). + /// + /// Page numbers are one-indexed and correspond to real page numbers in the + /// document (therefore not being affected by the document's page counter). + #[arg(long = "pages", value_delimiter = ',')] + pub pages: Option>, + /// Output a Makefile rule describing the current compilation #[clap(long = "make-deps", value_name = "PATH")] pub make_deps: Option, @@ -271,6 +286,55 @@ fn parse_input_pair(raw: &str) -> Result<(String, String), String> { Ok((key, val)) } +/// Implements parsing of page ranges (`1-3`, `4`, `5-`, `-2`), used by the +/// `CompileCommand.pages` argument, through the `FromStr` trait instead of +/// a value parser, in order to generate better errors. +/// +/// See also: https://github.com/clap-rs/clap/issues/5065 +#[derive(Debug, Clone)] +pub struct PageRangeArgument(RangeInclusive>); + +impl PageRangeArgument { + pub fn to_range(&self) -> RangeInclusive> { + self.0.clone() + } +} + +impl FromStr for PageRangeArgument { + type Err = &'static str; + + fn from_str(value: &str) -> Result { + match value.split('-').map(str::trim).collect::>().as_slice() { + [] | [""] => Err("page export range must not be empty"), + [single_page] => { + let page_number = parse_page_number(single_page)?; + Ok(PageRangeArgument(Some(page_number)..=Some(page_number))) + } + ["", ""] => Err("page export range must have start or end"), + [start, ""] => Ok(PageRangeArgument(Some(parse_page_number(start)?)..=None)), + ["", end] => Ok(PageRangeArgument(None..=Some(parse_page_number(end)?))), + [start, end] => { + let start = parse_page_number(start)?; + let end = parse_page_number(end)?; + if start > end { + Err("page export range must end at a page after the start") + } else { + Ok(PageRangeArgument(Some(start)..=Some(end))) + } + } + [_, _, _, ..] => Err("page export range must have a single hyphen"), + } + } +} + +fn parse_page_number(value: &str) -> Result { + if value == "0" { + Err("page numbers start at one") + } else { + NonZeroUsize::from_str(value).map_err(|_| "not a valid page number") + } +} + /// Lists all discovered fonts in system and custom font paths #[derive(Debug, Clone, Parser)] pub struct FontsCommand { diff --git a/crates/typst-cli/src/compile.rs b/crates/typst-cli/src/compile.rs index abe8768f9..bf9afc352 100644 --- a/crates/typst-cli/src/compile.rs +++ b/crates/typst-cli/src/compile.rs @@ -7,17 +7,19 @@ use codespan_reporting::diagnostic::{Diagnostic, Label}; use codespan_reporting::term; use ecow::{eco_format, EcoString}; use parking_lot::RwLock; -use rayon::iter::{IndexedParallelIterator, IntoParallelRefIterator, ParallelIterator}; +use rayon::iter::{IntoParallelRefIterator, ParallelIterator}; use typst::diag::{bail, At, Severity, SourceDiagnostic, StrResult}; use typst::eval::Tracer; use typst::foundations::{Datetime, Smart}; -use typst::layout::Frame; +use typst::layout::{Frame, PageRanges}; use typst::model::Document; use typst::syntax::{FileId, Source, Span}; use typst::visualize::Color; use typst::{World, WorldExt}; -use crate::args::{CompileCommand, DiagnosticFormat, Input, Output, OutputFormat}; +use crate::args::{ + CompileCommand, DiagnosticFormat, Input, Output, OutputFormat, PageRangeArgument, +}; use crate::timings::Timer; use crate::watch::Status; use crate::world::SystemWorld; @@ -60,6 +62,17 @@ impl CompileCommand { OutputFormat::Pdf }) } + + /// The ranges of the pages to be exported as specified by the user. + /// + /// This returns `None` if all pages should be exported. + pub fn exported_page_ranges(&self) -> Option { + self.pages.as_ref().map(|export_ranges| { + PageRanges::new( + export_ranges.iter().map(PageRangeArgument::to_range).collect(), + ) + }) + } } /// Execute a compilation command. @@ -171,7 +184,8 @@ fn export_pdf(document: &Document, command: &CompileCommand) -> StrResult<()> { let timestamp = convert_datetime( command.common.creation_timestamp.unwrap_or_else(chrono::Utc::now), ); - let buffer = typst_pdf::pdf(document, Smart::Auto, timestamp); + let exported_page_ranges = command.exported_page_ranges(); + let buffer = typst_pdf::pdf(document, Smart::Auto, timestamp, exported_page_ranges); command .output() .write(&buffer) @@ -214,7 +228,21 @@ fn export_image( output_template::has_indexable_template(output.to_str().unwrap_or_default()) } }; - if !can_handle_multiple && document.pages.len() > 1 { + + let exported_page_ranges = command.exported_page_ranges(); + + let exported_pages = document + .pages + .iter() + .enumerate() + .filter(|(i, _)| { + exported_page_ranges.as_ref().map_or(true, |exported_page_ranges| { + exported_page_ranges.includes_page_index(*i) + }) + }) + .collect::>(); + + if !can_handle_multiple && exported_pages.len() > 1 { let err = match output { Output::Stdout => "to stdout", Output::Path(_) => { @@ -227,10 +255,8 @@ fn export_image( let cache = world.export_cache(); // The results are collected in a `Vec<()>` which does not allocate. - document - .pages + exported_pages .par_iter() - .enumerate() .map(|(i, page)| { // Use output with converted path. let output = match output { @@ -250,7 +276,7 @@ fn export_image( // If we are not watching, don't use the cache. // If the frame is in the cache, skip it. // If the file does not exist, always create it. - if watching && cache.is_cached(i, &page.frame) && path.exists() { + if watching && cache.is_cached(*i, &page.frame) && path.exists() { return Ok(()); } diff --git a/crates/typst-pdf/src/lib.rs b/crates/typst-pdf/src/lib.rs index c55abcb06..e618f572a 100644 --- a/crates/typst-pdf/src/lib.rs +++ b/crates/typst-pdf/src/lib.rs @@ -21,7 +21,7 @@ use pdf_writer::writers::Destination; use pdf_writer::{Finish, Name, Pdf, Rect, Ref, Str, TextStr}; use typst::foundations::{Datetime, Label, NativeElement, Smart}; use typst::introspection::Location; -use typst::layout::{Abs, Dir, Em, Frame, Transform}; +use typst::layout::{Abs, Dir, Em, Frame, PageRanges, Transform}; use typst::model::{Document, HeadingElem}; use typst::text::color::frame_for_glyph; use typst::text::{Font, Lang}; @@ -55,13 +55,17 @@ use crate::pattern::PdfPattern; /// The `timestamp`, if given, is expected to be the creation date of the /// document as a UTC datetime. It will only be used if `set document(date: ..)` /// is `auto`. +/// +/// The `page_ranges` option specifies which ranges of pages should be exported +/// in the PDF. When `None`, all pages should be exported. #[typst_macros::time(name = "pdf")] pub fn pdf( document: &Document, ident: Smart<&str>, timestamp: Option, + page_ranges: Option, ) -> Vec { - let mut ctx = PdfContext::new(document); + let mut ctx = PdfContext::new(document, page_ranges); page::construct_pages(&mut ctx, &document.pages); font::write_fonts(&mut ctx); image::write_images(&mut ctx); @@ -82,7 +86,10 @@ struct PdfContext<'a> { /// The writer we are writing the PDF into. pdf: Pdf, /// Content of exported pages. - pages: Vec, + pages: Vec>, + /// Page ranges to export. + /// When `None`, all pages are exported. + exported_pages: Option, /// For each font a mapping from used glyphs to their text representation. /// May contain multiple chars in case of ligatures or similar things. The /// same glyph can have a different text representation within one document, @@ -108,8 +115,6 @@ struct PdfContext<'a> { /// dictionary), which Acrobat doesn't appreciate (it fails to parse the /// font) even if the specification seems to allow it. type3_font_resources_ref: Ref, - /// The IDs of written pages. - page_refs: Vec, /// The IDs of written fonts. font_refs: Vec, /// The IDs of written images. @@ -145,7 +150,7 @@ struct PdfContext<'a> { } impl<'a> PdfContext<'a> { - fn new(document: &'a Document) -> Self { + fn new(document: &'a Document, page_ranges: Option) -> Self { let mut alloc = Ref::new(1); let page_tree_ref = alloc.bump(); let global_resources_ref = alloc.bump(); @@ -154,13 +159,13 @@ impl<'a> PdfContext<'a> { document, pdf: Pdf::new(), pages: vec![], + exported_pages: page_ranges, glyph_sets: HashMap::new(), languages: BTreeMap::new(), alloc, page_tree_ref, global_resources_ref, type3_font_resources_ref, - page_refs: vec![], font_refs: vec![], image_refs: vec![], gradient_refs: vec![], @@ -251,7 +256,8 @@ fn write_catalog(ctx: &mut PdfContext, ident: Smart<&str>, timestamp: Option Option { // enforced in the manner shown below. let mut last_skipped_level = None; let elements = ctx.document.introspector.query(&HeadingElem::elem().select()); + for elem in elements.iter() { + if let Some(page_ranges) = &ctx.exported_pages { + if !page_ranges + .includes_page(ctx.document.introspector.page(elem.location().unwrap())) + { + // Don't bookmark headings in non-exported pages + continue; + } + } + let heading = elem.to_packed::().unwrap(); let leaf = HeadingNode::leaf(heading); @@ -166,9 +176,11 @@ fn write_outline_item( let loc = node.element.location().unwrap(); let pos = ctx.document.introspector.position(loc); let index = pos.page.get() - 1; - if let Some(page) = ctx.pages.get(index) { + + // Don't link to non-exported pages. + if let Some(Some(page)) = ctx.pages.get(index) { let y = (pos.point.y - Abs::pt(10.0)).max(Abs::zero()); - outline.dest().page(ctx.page_refs[index]).xyz( + outline.dest().page(page.id).xyz( pos.point.x.to_f32(), (page.size.y - y).to_f32(), None, diff --git a/crates/typst-pdf/src/page.rs b/crates/typst-pdf/src/page.rs index 621ac91fb..1785e98eb 100644 --- a/crates/typst-pdf/src/page.rs +++ b/crates/typst-pdf/src/page.rs @@ -27,20 +27,40 @@ use typst::visualize::{ /// Construct page objects. #[typst_macros::time(name = "construct pages")] pub(crate) fn construct_pages(ctx: &mut PdfContext, pages: &[Page]) { - for page in pages { - let (page_ref, mut encoded) = construct_page(ctx, &page.frame); - encoded.label = page - .numbering + let mut skipped_pages = 0; + for (i, page) in pages.iter().enumerate() { + if ctx + .exported_pages .as_ref() - .and_then(|num| PdfPageLabel::generate(num, page.number)); - ctx.page_refs.push(page_ref); - ctx.pages.push(encoded); + .is_some_and(|ranges| !ranges.includes_page_index(i)) + { + // Don't export this page. + ctx.pages.push(None); + skipped_pages += 1; + } else { + let mut encoded = construct_page(ctx, &page.frame); + encoded.label = page + .numbering + .as_ref() + .and_then(|num| PdfPageLabel::generate(num, page.number)) + .or_else(|| { + // When some pages were ignored from export, we show a page label with + // the correct real (not logical) page number. + // This is for consistency with normal output when pages have no numbering + // and all are exported: the final PDF page numbers always correspond to + // the real (not logical) page numbers. Here, the final PDF page number + // will differ, but we can at least use labels to indicate what was + // the corresponding real page number in the Typst document. + (skipped_pages > 0).then(|| PdfPageLabel::arabic(i + 1)) + }); + ctx.pages.push(Some(encoded)); + } } } /// Construct a page object. #[typst_macros::time(name = "construct page")] -pub(crate) fn construct_page(ctx: &mut PdfContext, frame: &Frame) -> (Ref, EncodedPage) { +pub(crate) fn construct_page(ctx: &mut PdfContext, frame: &Frame) -> EncodedPage { let page_ref = ctx.alloc.bump(); let size = frame.size(); @@ -60,7 +80,7 @@ pub(crate) fn construct_page(ctx: &mut PdfContext, frame: &Frame) -> (Ref, Encod // Encode the page into the content stream. write_frame(&mut ctx, frame); - let page = EncodedPage { + EncodedPage { size, content: deflate_deferred(ctx.content.finish()), id: page_ref, @@ -68,21 +88,20 @@ pub(crate) fn construct_page(ctx: &mut PdfContext, frame: &Frame) -> (Ref, Encod links: ctx.links, label: None, resources: ctx.resources, - }; - - (page_ref, page) + } } /// Write the page tree. pub(crate) fn write_page_tree(ctx: &mut PdfContext) { + let mut refs = vec![]; for i in 0..ctx.pages.len() { - write_page(ctx, i); + write_page(ctx, i, &mut refs); } ctx.pdf .pages(ctx.page_tree_ref) - .count(ctx.page_refs.len() as i32) - .kids(ctx.page_refs.iter().copied()); + .count(refs.len() as i32) + .kids(refs.iter().copied()); } /// Write the global resource dictionary that will be referenced by all pages. @@ -170,10 +189,15 @@ pub(crate) fn write_global_resources(ctx: &mut PdfContext) { } /// Write a page tree node. -fn write_page(ctx: &mut PdfContext, i: usize) { - let page = &ctx.pages[i]; +fn write_page(ctx: &mut PdfContext, i: usize, refs: &mut Vec) { + let Some(page) = &ctx.pages[i] else { + // Page excluded from export. + return; + }; let content_id = ctx.alloc.bump(); + refs.push(page.id); + let mut page_writer = ctx.pdf.page(page.id); page_writer.parent(ctx.page_tree_ref); @@ -225,7 +249,8 @@ fn write_page(ctx: &mut PdfContext, i: usize) { let index = pos.page.get() - 1; let y = (pos.point.y - Abs::pt(10.0)).max(Abs::zero()); - if let Some(page) = ctx.pages.get(index) { + // Don't add links to non-exported pages. + if let Some(Some(page)) = ctx.pages.get(index) { annotation .action() .action_type(ActionType::GoTo) @@ -244,9 +269,12 @@ fn write_page(ctx: &mut PdfContext, i: usize) { } /// Write the page labels. +/// They are numbered according to the page's final number, considering pages +/// which were removed from export, and not according to the page's real or +/// logical number in the initial Typst document. pub(crate) fn write_page_labels(ctx: &mut PdfContext) -> Vec<(NonZeroUsize, Ref)> { - // If there is no page labeled, we skip the writing - if !ctx.pages.iter().any(|p| { + // If there is no exported page labeled, we skip the writing + if !ctx.pages.iter().filter_map(Option::as_ref).any(|p| { p.label .as_ref() .is_some_and(|l| l.prefix.is_some() || l.style.is_some()) @@ -258,7 +286,8 @@ pub(crate) fn write_page_labels(ctx: &mut PdfContext) -> Vec<(NonZeroUsize, Ref) let empty_label = PdfPageLabel::default(); let mut prev: Option<&PdfPageLabel> = None; - for (i, page) in ctx.pages.iter().enumerate() { + // Skip non-exported pages for numbering. + for (i, page) in ctx.pages.iter().filter_map(Option::as_ref).enumerate() { let nr = NonZeroUsize::new(1 + i).unwrap(); // If there are pages with empty labels between labeled pages, we must // write empty PageLabel entries. @@ -372,6 +401,17 @@ impl PdfPageLabel { let offset = style.and(NonZeroUsize::new(number)); Some(PdfPageLabel { prefix, style, offset }) } + + /// Creates an arabic page label with the specified page number. + /// For example, this will display page label `11` when given the page + /// number 11. + fn arabic(number: usize) -> PdfPageLabel { + PdfPageLabel { + prefix: None, + style: Some(PdfPageLabelStyle::Arabic), + offset: NonZeroUsize::new(number), + } + } } /// Data for an exported page. diff --git a/crates/typst-pdf/src/pattern.rs b/crates/typst-pdf/src/pattern.rs index 5d5942bc9..211c056c1 100644 --- a/crates/typst-pdf/src/pattern.rs +++ b/crates/typst-pdf/src/pattern.rs @@ -116,7 +116,7 @@ fn register_pattern( }; // Render the body. - let (_, content) = construct_page(ctx.parent, pattern.frame()); + let content = construct_page(ctx.parent, pattern.frame()); let mut pdf_pattern = PdfPattern { transform, diff --git a/crates/typst/src/layout/page.rs b/crates/typst/src/layout/page.rs index c354dc81a..85a88e1bd 100644 --- a/crates/typst/src/layout/page.rs +++ b/crates/typst/src/layout/page.rs @@ -1,5 +1,6 @@ use std::borrow::Cow; use std::num::NonZeroUsize; +use std::ops::RangeInclusive; use std::ptr; use std::str::FromStr; @@ -726,6 +727,39 @@ cast! { v: Func => Self::Func(v), } +/// A list of page ranges to be exported. The ranges are one-indexed. +/// For example, `1..=3` indicates the first, second and third pages should be +/// exported. +pub struct PageRanges(Vec); + +pub type PageRange = RangeInclusive>; + +impl PageRanges { + pub fn new(ranges: Vec) -> Self { + Self(ranges) + } + + /// Check if a page, given its number, should be included when exporting the + /// document while restricting the exported pages to these page ranges. + /// This is the one-indexed version of 'includes_page_index'. + pub fn includes_page(&self, page: NonZeroUsize) -> bool { + self.includes_page_index(page.get() - 1) + } + + /// Check if a page, given its index, should be included when exporting the + /// document while restricting the exported pages to these page ranges. + /// This is the zero-indexed version of 'includes_page'. + pub fn includes_page_index(&self, page: usize) -> bool { + let page = NonZeroUsize::try_from(page + 1).unwrap(); + self.0.iter().any(|range| match (range.start(), range.end()) { + (Some(start), Some(end)) => (start..=end).contains(&&page), + (Some(start), None) => (start..).contains(&&page), + (None, Some(end)) => (..=end).contains(&&page), + (None, None) => true, + }) + } +} + /// A manual page break. /// /// Must not be used inside any containers. diff --git a/tests/src/run.rs b/tests/src/run.rs index d0d86ea6b..47760e164 100644 --- a/tests/src/run.rs +++ b/tests/src/run.rs @@ -176,7 +176,7 @@ impl<'a> Runner<'a> { // Write PDF if requested. if crate::ARGS.pdf() { let pdf_path = format!("{}/pdf/{}.pdf", crate::STORE_PATH, self.test.name); - let pdf = typst_pdf::pdf(document, Smart::Auto, None); + let pdf = typst_pdf::pdf(document, Smart::Auto, None, None); std::fs::write(pdf_path, pdf).unwrap(); }