From 84bd3454df487500d8a084190b4a10eac2a9f0f1 Mon Sep 17 00:00:00 2001 From: Martin Haug Date: Wed, 11 May 2022 15:19:03 +0200 Subject: [PATCH] Write language and direction for PDFs --- src/export/pdf.rs | 45 +++++++++++++++++++++++++++++++++---- src/frame.rs | 3 +++ src/library/text/lang.rs | 2 +- src/library/text/shaping.rs | 8 ++++++- 4 files changed, 52 insertions(+), 6 deletions(-) diff --git a/src/export/pdf.rs b/src/export/pdf.rs index 830e5bbbc..7fda504eb 100644 --- a/src/export/pdf.rs +++ b/src/export/pdf.rs @@ -7,8 +7,8 @@ use std::sync::Arc; use image::{DynamicImage, GenericImageView, ImageFormat, ImageResult, Rgba}; use pdf_writer::types::{ - ActionType, AnnotationType, CidFontType, ColorSpaceOperand, FontFlags, SystemInfo, - UnicodeCmap, + ActionType, AnnotationType, CidFontType, ColorSpaceOperand, Direction, FontFlags, + SystemInfo, UnicodeCmap, }; use pdf_writer::writers::ColorSpace; use pdf_writer::{Content, Filter, Finish, Name, PdfWriter, Rect, Ref, Str, TextStr}; @@ -18,10 +18,11 @@ use super::subset::subset; use crate::font::{find_name, FaceId, FontStore}; use crate::frame::{Element, Frame, Group, Text}; use crate::geom::{ - self, Color, Em, Geometry, Length, Numeric, Paint, Point, Shape, Size, Stroke, + self, Color, Dir, Em, Geometry, Length, Numeric, Paint, Point, Shape, Size, Stroke, Transform, }; use crate::image::{Image, ImageId, ImageStore, RasterImage}; +use crate::library::text::Lang; use crate::Context; /// Export a collection of frames into a PDF file. @@ -303,6 +304,7 @@ impl<'a> PdfExporter<'a> { // The page objects (non-root nodes in the page tree). let mut page_refs = vec![]; + let mut languages = HashMap::new(); for page in self.pages { let page_id = self.alloc.bump(); let content_id = self.alloc.bump(); @@ -330,6 +332,13 @@ impl<'a> PdfExporter<'a> { annotations.finish(); page_writer.finish(); + for (lang, count) in page.languages { + languages + .entry(lang) + .and_modify(|x| *x += count) + .or_insert_with(|| count); + } + self.writer .stream(content_id, &deflate(&page.content.finish())) .filter(Filter::FlateDecode); @@ -359,9 +368,28 @@ impl<'a> PdfExporter<'a> { resources.finish(); pages.finish(); + let lang = languages + .into_iter() + .max_by(|(_, v1), (_, v2)| v1.cmp(v2)) + .map(|(k, _)| k); + + let dir = if lang.map(Lang::dir) == Some(Dir::RTL) { + Direction::R2L + } else { + Direction::L2R + }; + // Write the document information, catalog and wrap it up! self.writer.document_info(self.alloc.bump()).creator(TextStr("Typst")); - self.writer.catalog(self.alloc.bump()).pages(page_tree_ref); + let mut catalog = self.writer.catalog(self.alloc.bump()); + catalog.pages(page_tree_ref); + catalog.viewer_preferences().direction(dir); + + if let Some(lang) = lang { + catalog.lang(TextStr(lang.as_str())); + } + + catalog.finish(); self.writer.finish() } } @@ -372,6 +400,7 @@ struct PageExporter<'a> { font_map: &'a mut Remapper, image_map: &'a mut Remapper, glyphs: &'a mut HashMap>, + languages: HashMap, bottom: f32, content: Content, links: Vec<(String, Rect)>, @@ -384,6 +413,7 @@ struct Page { size: Size, content: Content, links: Vec<(String, Rect)>, + languages: HashMap, } /// A simulated graphics state used to deduplicate graphics state changes and @@ -403,6 +433,7 @@ impl<'a> PageExporter<'a> { font_map: &mut exporter.face_map, image_map: &mut exporter.image_map, glyphs: &mut exporter.glyph_sets, + languages: HashMap::new(), bottom: 0.0, content: Content::new(), links: vec![], @@ -422,6 +453,7 @@ impl<'a> PageExporter<'a> { size: frame.size, content: self.content, links: self.links, + languages: self.languages, } } @@ -508,6 +540,11 @@ impl<'a> PageExporter<'a> { items.show(Str(&encoded)); } + self.languages + .entry(text.lang) + .and_modify(|x| *x += text.glyphs.len()) + .or_insert_with(|| text.glyphs.len()); + items.finish(); positioned.finish(); self.content.end_text(); diff --git a/src/frame.rs b/src/frame.rs index 80e25f3b4..04551e6e2 100644 --- a/src/frame.rs +++ b/src/frame.rs @@ -8,6 +8,7 @@ use crate::geom::{ Align, Em, Length, Numeric, Paint, Point, Shape, Size, Spec, Transform, }; use crate::image::ImageId; +use crate::library::text::Lang; use crate::util::{EcoString, MaybeShared}; /// A finished layout with elements at fixed positions. @@ -269,6 +270,8 @@ pub struct Text { pub size: Length, /// Glyph color. pub fill: Paint, + /// The natural language of the text. + pub lang: Lang, /// The glyphs. pub glyphs: Vec, } diff --git a/src/library/text/lang.rs b/src/library/text/lang.rs index 360827fac..b75b3cd82 100644 --- a/src/library/text/lang.rs +++ b/src/library/text/lang.rs @@ -28,7 +28,7 @@ impl Lang { } /// The default direction for the language. - pub fn dir(&self) -> Dir { + pub fn dir(self) -> Dir { match self.as_str() { "ar" | "dv" | "fa" | "he" | "ks" | "pa" | "ps" | "sd" | "ug" | "ur" | "yi" => Dir::RTL, diff --git a/src/library/text/shaping.rs b/src/library/text/shaping.rs index 80f1b17df..66a9f7c26 100644 --- a/src/library/text/shaping.rs +++ b/src/library/text/shaping.rs @@ -104,7 +104,13 @@ impl<'a> ShapedText<'a> { }) .collect(); - let text = Text { face_id, size: self.size, fill, glyphs }; + let text = Text { + face_id, + size: self.size, + lang: self.styles.get(TextNode::LANG), + fill, + glyphs, + }; let text_layer = frame.layer(); let width = text.width();