Add parameter to select pages to be exported by CLI (#4039)

This commit is contained in:
PgBiel 2024-05-10 11:47:02 -03:00 committed by GitHub
parent be12762d94
commit 7905de67bc
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 226 additions and 43 deletions

View File

@ -1,5 +1,8 @@
use std::fmt::{self, Display, Formatter};
use std::num::NonZeroUsize;
use std::ops::RangeInclusive;
use std::path::PathBuf;
use std::str::FromStr;
use chrono::{DateTime, Utc};
use clap::builder::ValueParser;
@ -76,6 +79,18 @@ pub struct CompileCommand {
#[clap(required_if_eq("input", "-"), value_parser = ValueParser::new(output_value_parser))]
pub output: Option<Output>,
/// Which pages to export. When unspecified, all document pages are exported.
///
/// Pages to export are separated by commas, and can be either simple page
/// numbers (e.g. '2,5' to export only pages 2 and 5) or page ranges
/// (e.g. '2,3-6,8-' to export page 2, pages 3 to 6 (inclusive), page 8 and
/// any pages after it).
///
/// Page numbers are one-indexed and correspond to real page numbers in the
/// document (therefore not being affected by the document's page counter).
#[arg(long = "pages", value_delimiter = ',')]
pub pages: Option<Vec<PageRangeArgument>>,
/// Output a Makefile rule describing the current compilation
#[clap(long = "make-deps", value_name = "PATH")]
pub make_deps: Option<PathBuf>,
@ -271,6 +286,55 @@ fn parse_input_pair(raw: &str) -> Result<(String, String), String> {
Ok((key, val))
}
/// Implements parsing of page ranges (`1-3`, `4`, `5-`, `-2`), used by the
/// `CompileCommand.pages` argument, through the `FromStr` trait instead of
/// a value parser, in order to generate better errors.
///
/// See also: https://github.com/clap-rs/clap/issues/5065
#[derive(Debug, Clone)]
pub struct PageRangeArgument(RangeInclusive<Option<NonZeroUsize>>);
impl PageRangeArgument {
pub fn to_range(&self) -> RangeInclusive<Option<NonZeroUsize>> {
self.0.clone()
}
}
impl FromStr for PageRangeArgument {
type Err = &'static str;
fn from_str(value: &str) -> Result<Self, Self::Err> {
match value.split('-').map(str::trim).collect::<Vec<_>>().as_slice() {
[] | [""] => Err("page export range must not be empty"),
[single_page] => {
let page_number = parse_page_number(single_page)?;
Ok(PageRangeArgument(Some(page_number)..=Some(page_number)))
}
["", ""] => Err("page export range must have start or end"),
[start, ""] => Ok(PageRangeArgument(Some(parse_page_number(start)?)..=None)),
["", end] => Ok(PageRangeArgument(None..=Some(parse_page_number(end)?))),
[start, end] => {
let start = parse_page_number(start)?;
let end = parse_page_number(end)?;
if start > end {
Err("page export range must end at a page after the start")
} else {
Ok(PageRangeArgument(Some(start)..=Some(end)))
}
}
[_, _, _, ..] => Err("page export range must have a single hyphen"),
}
}
}
fn parse_page_number(value: &str) -> Result<NonZeroUsize, &'static str> {
if value == "0" {
Err("page numbers start at one")
} else {
NonZeroUsize::from_str(value).map_err(|_| "not a valid page number")
}
}
/// Lists all discovered fonts in system and custom font paths
#[derive(Debug, Clone, Parser)]
pub struct FontsCommand {

View File

@ -7,17 +7,19 @@ use codespan_reporting::diagnostic::{Diagnostic, Label};
use codespan_reporting::term;
use ecow::{eco_format, EcoString};
use parking_lot::RwLock;
use rayon::iter::{IndexedParallelIterator, IntoParallelRefIterator, ParallelIterator};
use rayon::iter::{IntoParallelRefIterator, ParallelIterator};
use typst::diag::{bail, At, Severity, SourceDiagnostic, StrResult};
use typst::eval::Tracer;
use typst::foundations::{Datetime, Smart};
use typst::layout::Frame;
use typst::layout::{Frame, PageRanges};
use typst::model::Document;
use typst::syntax::{FileId, Source, Span};
use typst::visualize::Color;
use typst::{World, WorldExt};
use crate::args::{CompileCommand, DiagnosticFormat, Input, Output, OutputFormat};
use crate::args::{
CompileCommand, DiagnosticFormat, Input, Output, OutputFormat, PageRangeArgument,
};
use crate::timings::Timer;
use crate::watch::Status;
use crate::world::SystemWorld;
@ -60,6 +62,17 @@ impl CompileCommand {
OutputFormat::Pdf
})
}
/// The ranges of the pages to be exported as specified by the user.
///
/// This returns `None` if all pages should be exported.
pub fn exported_page_ranges(&self) -> Option<PageRanges> {
self.pages.as_ref().map(|export_ranges| {
PageRanges::new(
export_ranges.iter().map(PageRangeArgument::to_range).collect(),
)
})
}
}
/// Execute a compilation command.
@ -171,7 +184,8 @@ fn export_pdf(document: &Document, command: &CompileCommand) -> StrResult<()> {
let timestamp = convert_datetime(
command.common.creation_timestamp.unwrap_or_else(chrono::Utc::now),
);
let buffer = typst_pdf::pdf(document, Smart::Auto, timestamp);
let exported_page_ranges = command.exported_page_ranges();
let buffer = typst_pdf::pdf(document, Smart::Auto, timestamp, exported_page_ranges);
command
.output()
.write(&buffer)
@ -214,7 +228,21 @@ fn export_image(
output_template::has_indexable_template(output.to_str().unwrap_or_default())
}
};
if !can_handle_multiple && document.pages.len() > 1 {
let exported_page_ranges = command.exported_page_ranges();
let exported_pages = document
.pages
.iter()
.enumerate()
.filter(|(i, _)| {
exported_page_ranges.as_ref().map_or(true, |exported_page_ranges| {
exported_page_ranges.includes_page_index(*i)
})
})
.collect::<Vec<_>>();
if !can_handle_multiple && exported_pages.len() > 1 {
let err = match output {
Output::Stdout => "to stdout",
Output::Path(_) => {
@ -227,10 +255,8 @@ fn export_image(
let cache = world.export_cache();
// The results are collected in a `Vec<()>` which does not allocate.
document
.pages
exported_pages
.par_iter()
.enumerate()
.map(|(i, page)| {
// Use output with converted path.
let output = match output {
@ -250,7 +276,7 @@ fn export_image(
// If we are not watching, don't use the cache.
// If the frame is in the cache, skip it.
// If the file does not exist, always create it.
if watching && cache.is_cached(i, &page.frame) && path.exists() {
if watching && cache.is_cached(*i, &page.frame) && path.exists() {
return Ok(());
}

View File

@ -21,7 +21,7 @@ use pdf_writer::writers::Destination;
use pdf_writer::{Finish, Name, Pdf, Rect, Ref, Str, TextStr};
use typst::foundations::{Datetime, Label, NativeElement, Smart};
use typst::introspection::Location;
use typst::layout::{Abs, Dir, Em, Frame, Transform};
use typst::layout::{Abs, Dir, Em, Frame, PageRanges, Transform};
use typst::model::{Document, HeadingElem};
use typst::text::color::frame_for_glyph;
use typst::text::{Font, Lang};
@ -55,13 +55,17 @@ use crate::pattern::PdfPattern;
/// The `timestamp`, if given, is expected to be the creation date of the
/// document as a UTC datetime. It will only be used if `set document(date: ..)`
/// is `auto`.
///
/// The `page_ranges` option specifies which ranges of pages should be exported
/// in the PDF. When `None`, all pages should be exported.
#[typst_macros::time(name = "pdf")]
pub fn pdf(
document: &Document,
ident: Smart<&str>,
timestamp: Option<Datetime>,
page_ranges: Option<PageRanges>,
) -> Vec<u8> {
let mut ctx = PdfContext::new(document);
let mut ctx = PdfContext::new(document, page_ranges);
page::construct_pages(&mut ctx, &document.pages);
font::write_fonts(&mut ctx);
image::write_images(&mut ctx);
@ -82,7 +86,10 @@ struct PdfContext<'a> {
/// The writer we are writing the PDF into.
pdf: Pdf,
/// Content of exported pages.
pages: Vec<EncodedPage>,
pages: Vec<Option<EncodedPage>>,
/// Page ranges to export.
/// When `None`, all pages are exported.
exported_pages: Option<PageRanges>,
/// For each font a mapping from used glyphs to their text representation.
/// May contain multiple chars in case of ligatures or similar things. The
/// same glyph can have a different text representation within one document,
@ -108,8 +115,6 @@ struct PdfContext<'a> {
/// dictionary), which Acrobat doesn't appreciate (it fails to parse the
/// font) even if the specification seems to allow it.
type3_font_resources_ref: Ref,
/// The IDs of written pages.
page_refs: Vec<Ref>,
/// The IDs of written fonts.
font_refs: Vec<Ref>,
/// The IDs of written images.
@ -145,7 +150,7 @@ struct PdfContext<'a> {
}
impl<'a> PdfContext<'a> {
fn new(document: &'a Document) -> Self {
fn new(document: &'a Document, page_ranges: Option<PageRanges>) -> Self {
let mut alloc = Ref::new(1);
let page_tree_ref = alloc.bump();
let global_resources_ref = alloc.bump();
@ -154,13 +159,13 @@ impl<'a> PdfContext<'a> {
document,
pdf: Pdf::new(),
pages: vec![],
exported_pages: page_ranges,
glyph_sets: HashMap::new(),
languages: BTreeMap::new(),
alloc,
page_tree_ref,
global_resources_ref,
type3_font_resources_ref,
page_refs: vec![],
font_refs: vec![],
image_refs: vec![],
gradient_refs: vec![],
@ -251,7 +256,8 @@ fn write_catalog(ctx: &mut PdfContext, ident: Smart<&str>, timestamp: Option<Dat
}
info.finish();
xmp.num_pages(ctx.document.pages.len() as u32);
// Only count exported pages.
xmp.num_pages(ctx.pages.iter().filter(|page| page.is_some()).count() as u32);
xmp.format("application/pdf");
xmp.language(ctx.languages.keys().map(|lang| LangId(lang.as_str())));
@ -350,7 +356,8 @@ fn write_named_destinations(ctx: &mut PdfContext) {
let index = pos.page.get() - 1;
let y = (pos.point.y - Abs::pt(10.0)).max(Abs::zero());
if let Some(page) = ctx.pages.get(index) {
// If the heading's page exists and is exported, include it.
if let Some(Some(page)) = ctx.pages.get(index) {
let dest_ref = ctx.alloc.bump();
let x = pos.point.x.to_f32();
let y = (page.size.y - y).to_f32();

View File

@ -18,7 +18,17 @@ pub(crate) fn write_outline(ctx: &mut PdfContext) -> Option<Ref> {
// enforced in the manner shown below.
let mut last_skipped_level = None;
let elements = ctx.document.introspector.query(&HeadingElem::elem().select());
for elem in elements.iter() {
if let Some(page_ranges) = &ctx.exported_pages {
if !page_ranges
.includes_page(ctx.document.introspector.page(elem.location().unwrap()))
{
// Don't bookmark headings in non-exported pages
continue;
}
}
let heading = elem.to_packed::<HeadingElem>().unwrap();
let leaf = HeadingNode::leaf(heading);
@ -166,9 +176,11 @@ fn write_outline_item(
let loc = node.element.location().unwrap();
let pos = ctx.document.introspector.position(loc);
let index = pos.page.get() - 1;
if let Some(page) = ctx.pages.get(index) {
// Don't link to non-exported pages.
if let Some(Some(page)) = ctx.pages.get(index) {
let y = (pos.point.y - Abs::pt(10.0)).max(Abs::zero());
outline.dest().page(ctx.page_refs[index]).xyz(
outline.dest().page(page.id).xyz(
pos.point.x.to_f32(),
(page.size.y - y).to_f32(),
None,

View File

@ -27,20 +27,40 @@ use typst::visualize::{
/// Construct page objects.
#[typst_macros::time(name = "construct pages")]
pub(crate) fn construct_pages(ctx: &mut PdfContext, pages: &[Page]) {
for page in pages {
let (page_ref, mut encoded) = construct_page(ctx, &page.frame);
encoded.label = page
.numbering
let mut skipped_pages = 0;
for (i, page) in pages.iter().enumerate() {
if ctx
.exported_pages
.as_ref()
.and_then(|num| PdfPageLabel::generate(num, page.number));
ctx.page_refs.push(page_ref);
ctx.pages.push(encoded);
.is_some_and(|ranges| !ranges.includes_page_index(i))
{
// Don't export this page.
ctx.pages.push(None);
skipped_pages += 1;
} else {
let mut encoded = construct_page(ctx, &page.frame);
encoded.label = page
.numbering
.as_ref()
.and_then(|num| PdfPageLabel::generate(num, page.number))
.or_else(|| {
// When some pages were ignored from export, we show a page label with
// the correct real (not logical) page number.
// This is for consistency with normal output when pages have no numbering
// and all are exported: the final PDF page numbers always correspond to
// the real (not logical) page numbers. Here, the final PDF page number
// will differ, but we can at least use labels to indicate what was
// the corresponding real page number in the Typst document.
(skipped_pages > 0).then(|| PdfPageLabel::arabic(i + 1))
});
ctx.pages.push(Some(encoded));
}
}
}
/// Construct a page object.
#[typst_macros::time(name = "construct page")]
pub(crate) fn construct_page(ctx: &mut PdfContext, frame: &Frame) -> (Ref, EncodedPage) {
pub(crate) fn construct_page(ctx: &mut PdfContext, frame: &Frame) -> EncodedPage {
let page_ref = ctx.alloc.bump();
let size = frame.size();
@ -60,7 +80,7 @@ pub(crate) fn construct_page(ctx: &mut PdfContext, frame: &Frame) -> (Ref, Encod
// Encode the page into the content stream.
write_frame(&mut ctx, frame);
let page = EncodedPage {
EncodedPage {
size,
content: deflate_deferred(ctx.content.finish()),
id: page_ref,
@ -68,21 +88,20 @@ pub(crate) fn construct_page(ctx: &mut PdfContext, frame: &Frame) -> (Ref, Encod
links: ctx.links,
label: None,
resources: ctx.resources,
};
(page_ref, page)
}
}
/// Write the page tree.
pub(crate) fn write_page_tree(ctx: &mut PdfContext) {
let mut refs = vec![];
for i in 0..ctx.pages.len() {
write_page(ctx, i);
write_page(ctx, i, &mut refs);
}
ctx.pdf
.pages(ctx.page_tree_ref)
.count(ctx.page_refs.len() as i32)
.kids(ctx.page_refs.iter().copied());
.count(refs.len() as i32)
.kids(refs.iter().copied());
}
/// Write the global resource dictionary that will be referenced by all pages.
@ -170,10 +189,15 @@ pub(crate) fn write_global_resources(ctx: &mut PdfContext) {
}
/// Write a page tree node.
fn write_page(ctx: &mut PdfContext, i: usize) {
let page = &ctx.pages[i];
fn write_page(ctx: &mut PdfContext, i: usize, refs: &mut Vec<Ref>) {
let Some(page) = &ctx.pages[i] else {
// Page excluded from export.
return;
};
let content_id = ctx.alloc.bump();
refs.push(page.id);
let mut page_writer = ctx.pdf.page(page.id);
page_writer.parent(ctx.page_tree_ref);
@ -225,7 +249,8 @@ fn write_page(ctx: &mut PdfContext, i: usize) {
let index = pos.page.get() - 1;
let y = (pos.point.y - Abs::pt(10.0)).max(Abs::zero());
if let Some(page) = ctx.pages.get(index) {
// Don't add links to non-exported pages.
if let Some(Some(page)) = ctx.pages.get(index) {
annotation
.action()
.action_type(ActionType::GoTo)
@ -244,9 +269,12 @@ fn write_page(ctx: &mut PdfContext, i: usize) {
}
/// Write the page labels.
/// They are numbered according to the page's final number, considering pages
/// which were removed from export, and not according to the page's real or
/// logical number in the initial Typst document.
pub(crate) fn write_page_labels(ctx: &mut PdfContext) -> Vec<(NonZeroUsize, Ref)> {
// If there is no page labeled, we skip the writing
if !ctx.pages.iter().any(|p| {
// If there is no exported page labeled, we skip the writing
if !ctx.pages.iter().filter_map(Option::as_ref).any(|p| {
p.label
.as_ref()
.is_some_and(|l| l.prefix.is_some() || l.style.is_some())
@ -258,7 +286,8 @@ pub(crate) fn write_page_labels(ctx: &mut PdfContext) -> Vec<(NonZeroUsize, Ref)
let empty_label = PdfPageLabel::default();
let mut prev: Option<&PdfPageLabel> = None;
for (i, page) in ctx.pages.iter().enumerate() {
// Skip non-exported pages for numbering.
for (i, page) in ctx.pages.iter().filter_map(Option::as_ref).enumerate() {
let nr = NonZeroUsize::new(1 + i).unwrap();
// If there are pages with empty labels between labeled pages, we must
// write empty PageLabel entries.
@ -372,6 +401,17 @@ impl PdfPageLabel {
let offset = style.and(NonZeroUsize::new(number));
Some(PdfPageLabel { prefix, style, offset })
}
/// Creates an arabic page label with the specified page number.
/// For example, this will display page label `11` when given the page
/// number 11.
fn arabic(number: usize) -> PdfPageLabel {
PdfPageLabel {
prefix: None,
style: Some(PdfPageLabelStyle::Arabic),
offset: NonZeroUsize::new(number),
}
}
}
/// Data for an exported page.

View File

@ -116,7 +116,7 @@ fn register_pattern(
};
// Render the body.
let (_, content) = construct_page(ctx.parent, pattern.frame());
let content = construct_page(ctx.parent, pattern.frame());
let mut pdf_pattern = PdfPattern {
transform,

View File

@ -1,5 +1,6 @@
use std::borrow::Cow;
use std::num::NonZeroUsize;
use std::ops::RangeInclusive;
use std::ptr;
use std::str::FromStr;
@ -726,6 +727,39 @@ cast! {
v: Func => Self::Func(v),
}
/// A list of page ranges to be exported. The ranges are one-indexed.
/// For example, `1..=3` indicates the first, second and third pages should be
/// exported.
pub struct PageRanges(Vec<PageRange>);
pub type PageRange = RangeInclusive<Option<NonZeroUsize>>;
impl PageRanges {
pub fn new(ranges: Vec<PageRange>) -> Self {
Self(ranges)
}
/// Check if a page, given its number, should be included when exporting the
/// document while restricting the exported pages to these page ranges.
/// This is the one-indexed version of 'includes_page_index'.
pub fn includes_page(&self, page: NonZeroUsize) -> bool {
self.includes_page_index(page.get() - 1)
}
/// Check if a page, given its index, should be included when exporting the
/// document while restricting the exported pages to these page ranges.
/// This is the zero-indexed version of 'includes_page'.
pub fn includes_page_index(&self, page: usize) -> bool {
let page = NonZeroUsize::try_from(page + 1).unwrap();
self.0.iter().any(|range| match (range.start(), range.end()) {
(Some(start), Some(end)) => (start..=end).contains(&&page),
(Some(start), None) => (start..).contains(&&page),
(None, Some(end)) => (..=end).contains(&&page),
(None, None) => true,
})
}
}
/// A manual page break.
///
/// Must not be used inside any containers.

View File

@ -176,7 +176,7 @@ impl<'a> Runner<'a> {
// Write PDF if requested.
if crate::ARGS.pdf() {
let pdf_path = format!("{}/pdf/{}.pdf", crate::STORE_PATH, self.test.name);
let pdf = typst_pdf::pdf(document, Smart::Auto, None);
let pdf = typst_pdf::pdf(document, Smart::Auto, None, None);
std::fs::write(pdf_path, pdf).unwrap();
}