Generate PDF ID automatically unless we really have a stable ID (#3591)

This commit is contained in:
Laurenz 2024-03-09 12:55:03 +01:00 committed by GitHub
parent 204c4ecfcb
commit 82617a6a3c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 40 additions and 38 deletions

View File

@ -9,7 +9,7 @@ use parking_lot::RwLock;
use rayon::iter::{IndexedParallelIterator, IntoParallelRefIterator, ParallelIterator}; use rayon::iter::{IndexedParallelIterator, IntoParallelRefIterator, ParallelIterator};
use typst::diag::{bail, At, Severity, SourceDiagnostic, StrResult}; use typst::diag::{bail, At, Severity, SourceDiagnostic, StrResult};
use typst::eval::Tracer; use typst::eval::Tracer;
use typst::foundations::Datetime; use typst::foundations::{Datetime, Smart};
use typst::layout::Frame; use typst::layout::Frame;
use typst::model::Document; use typst::model::Document;
use typst::syntax::{FileId, Source, Span}; use typst::syntax::{FileId, Source, Span};
@ -157,18 +157,13 @@ fn export(
OutputFormat::Svg => { OutputFormat::Svg => {
export_image(world, document, command, watching, ImageExportFormat::Svg) export_image(world, document, command, watching, ImageExportFormat::Svg)
} }
OutputFormat::Pdf => export_pdf(document, command, world), OutputFormat::Pdf => export_pdf(document, command),
} }
} }
/// Export to a PDF. /// Export to a PDF.
fn export_pdf( fn export_pdf(document: &Document, command: &CompileCommand) -> StrResult<()> {
document: &Document, let buffer = typst_pdf::pdf(document, Smart::Auto, now());
command: &CompileCommand,
world: &SystemWorld,
) -> StrResult<()> {
let ident = world.input().map(|i| i.to_string_lossy());
let buffer = typst_pdf::pdf(document, ident.as_deref(), now());
let output = command.output(); let output = command.output();
fs::write(output, buffer) fs::write(output, buffer)
.map_err(|err| eco_format!("failed to write PDF file ({err})"))?; .map_err(|err| eco_format!("failed to write PDF file ({err})"))?;

View File

@ -29,8 +29,6 @@ static STDIN_ID: Lazy<FileId> =
pub struct SystemWorld { pub struct SystemWorld {
/// The working directory. /// The working directory.
workdir: Option<PathBuf>, workdir: Option<PathBuf>,
/// The canonical path to the input file.
input: Option<PathBuf>,
/// The root relative to which absolute paths are resolved. /// The root relative to which absolute paths are resolved.
root: PathBuf, root: PathBuf,
/// The input path. /// The input path.
@ -108,7 +106,6 @@ impl SystemWorld {
Ok(Self { Ok(Self {
workdir: std::env::current_dir().ok(), workdir: std::env::current_dir().ok(),
input,
root, root,
main, main,
library: Prehashed::new(library), library: Prehashed::new(library),
@ -152,11 +149,6 @@ impl SystemWorld {
self.now.take(); self.now.take();
} }
/// Return the canonical path to the input file.
pub fn input(&self) -> Option<&PathBuf> {
self.input.as_ref()
}
/// Lookup a source file by id. /// Lookup a source file by id.
#[track_caller] #[track_caller]
pub fn lookup(&self, id: FileId) -> Source { pub fn lookup(&self, id: FileId) -> Source {

View File

@ -19,7 +19,7 @@ use ecow::{eco_format, EcoString};
use pdf_writer::types::Direction; use pdf_writer::types::Direction;
use pdf_writer::writers::Destination; use pdf_writer::writers::Destination;
use pdf_writer::{Finish, Name, Pdf, Ref, Str, TextStr}; use pdf_writer::{Finish, Name, Pdf, Ref, Str, TextStr};
use typst::foundations::{Datetime, Label, NativeElement}; use typst::foundations::{Datetime, Label, NativeElement, Smart};
use typst::introspection::Location; use typst::introspection::Location;
use typst::layout::{Abs, Dir, Em, Transform}; use typst::layout::{Abs, Dir, Em, Transform};
use typst::model::{Document, HeadingElem}; use typst::model::{Document, HeadingElem};
@ -39,12 +39,17 @@ use crate::pattern::PdfPattern;
/// ///
/// Returns the raw bytes making up the PDF file. /// Returns the raw bytes making up the PDF file.
/// ///
/// The `ident` parameter shall be a string that uniquely and stably identifies /// The `ident` parameter, if given, shall be a string that uniquely and stably
/// the document. It should not change between compilations of the same /// identifies the document. It should not change between compilations of the
/// document. Its hash will be used to create a PDF document identifier (the /// same document. **If you cannot provide such a stable identifier, just pass
/// identifier itself is not leaked). If `ident` is `None`, a hash of the /// `Smart::Auto` rather than trying to come up with one.** The CLI, for
/// document is used instead (which means that it _will_ change across /// example, does not have a well-defined notion of a long-lived project and as
/// compilations). /// such just passes `Smart::Auto`.
///
/// If an `ident` is given, the hash of it will be used to create a PDF document
/// identifier (the identifier itself is not leaked). If `ident` is `Auto`, a
/// hash of the document's title and author is used instead (which is reasonably
/// unique and stable).
/// ///
/// The `timestamp`, if given, is expected to be the creation date of the /// The `timestamp`, if given, is expected to be the creation date of the
/// document as a UTC datetime. It will only be used if `set document(date: ..)` /// document as a UTC datetime. It will only be used if `set document(date: ..)`
@ -52,7 +57,7 @@ use crate::pattern::PdfPattern;
#[typst_macros::time(name = "pdf")] #[typst_macros::time(name = "pdf")]
pub fn pdf( pub fn pdf(
document: &Document, document: &Document,
ident: Option<&str>, ident: Smart<&str>,
timestamp: Option<Datetime>, timestamp: Option<Datetime>,
) -> Vec<u8> { ) -> Vec<u8> {
let mut ctx = PdfContext::new(document); let mut ctx = PdfContext::new(document);
@ -158,7 +163,7 @@ impl<'a> PdfContext<'a> {
} }
/// Write the document catalog. /// Write the document catalog.
fn write_catalog(ctx: &mut PdfContext, ident: Option<&str>, timestamp: Option<Datetime>) { fn write_catalog(ctx: &mut PdfContext, ident: Smart<&str>, timestamp: Option<Datetime>) {
let lang = ctx.languages.iter().max_by_key(|(_, &count)| count).map(|(&l, _)| l); let lang = ctx.languages.iter().max_by_key(|(_, &count)| count).map(|(&l, _)| l);
let dir = if lang.map(Lang::dir) == Some(Dir::RTL) { let dir = if lang.map(Lang::dir) == Some(Dir::RTL) {
@ -236,18 +241,25 @@ fn write_catalog(ctx: &mut PdfContext, ident: Option<&str>, timestamp: Option<Da
// changes in the frames. // changes in the frames.
let instance_id = hash_base64(&ctx.pdf.as_bytes()); let instance_id = hash_base64(&ctx.pdf.as_bytes());
if let Some(ident) = ident { // Determine the document's ID. It should be as stable as possible.
// A unique ID for the document that stays stable across compilations. const PDF_VERSION: &str = "PDF-1.7";
let doc_id = hash_base64(&("PDF-1.7", ident)); let doc_id = if let Smart::Custom(ident) = ident {
xmp.document_id(&doc_id); // We were provided with a stable ID. Yay!
xmp.instance_id(&instance_id); hash_base64(&(PDF_VERSION, ident))
ctx.pdf } else if ctx.document.title.is_some() && !ctx.document.author.is_empty() {
.set_file_id((doc_id.clone().into_bytes(), instance_id.into_bytes())); // If not provided from the outside, but title and author were given, we
// compute a hash of them, which should be reasonably stable and unique.
hash_base64(&(PDF_VERSION, &ctx.document.title, &ctx.document.author))
} else { } else {
// This is not spec-compliant, but some PDF readers really want an ID. // The user provided no usable metadata which we can use as an `/ID`.
let bytes = instance_id.into_bytes(); instance_id.clone()
ctx.pdf.set_file_id((bytes.clone(), bytes)); };
}
// Write IDs.
xmp.document_id(&doc_id);
xmp.instance_id(&instance_id);
ctx.pdf
.set_file_id((doc_id.clone().into_bytes(), instance_id.into_bytes()));
xmp.rendition_class(RenditionClass::Proof); xmp.rendition_class(RenditionClass::Proof);
xmp.pdf_version("1.7"); xmp.pdf_version("1.7");

View File

@ -51,6 +51,9 @@ pub struct DocumentElem {
/// ///
/// The year component must be at least zero in order to be embedded into a /// The year component must be at least zero in order to be embedded into a
/// PDF. /// PDF.
///
/// If you want to create byte-by-byte reproducible PDFs, set this to
/// something other than `{auto}`.
#[ghost] #[ghost]
pub date: Smart<Option<Datetime>>, pub date: Smart<Option<Datetime>>,

View File

@ -501,7 +501,7 @@ fn test(
if let Some(pdf_path) = pdf_path { if let Some(pdf_path) = pdf_path {
let pdf_data = typst_pdf::pdf( let pdf_data = typst_pdf::pdf(
&document, &document,
Some(&format!("typst-test: {}", name.display())), Smart::Custom(&format!("typst-test: {}", name.display())),
world.today(Some(0)), world.today(Some(0)),
); );
fs::create_dir_all(pdf_path.parent().unwrap()).unwrap(); fs::create_dir_all(pdf_path.parent().unwrap()).unwrap();