mirror of
https://github.com/typst/typst
synced 2025-05-14 04:56:26 +08:00
Support for defining which charset should be covered by a font (#5305)
Co-authored-by: Laurenz <laurmaedje@gmail.com>
This commit is contained in:
parent
54cee16c31
commit
73253d4651
1
Cargo.lock
generated
1
Cargo.lock
generated
@ -2931,6 +2931,7 @@ dependencies = [
|
||||
"qcms",
|
||||
"rayon",
|
||||
"regex",
|
||||
"regex-syntax",
|
||||
"roxmltree",
|
||||
"rust_decimal",
|
||||
"rustybuzz",
|
||||
|
@ -94,6 +94,7 @@ qcms = "0.3.0"
|
||||
quote = "1"
|
||||
rayon = "1.7.0"
|
||||
regex = "1"
|
||||
regex-syntax = "0.8"
|
||||
resvg = { version = "0.43", default-features = false, features = ["raster-images"] }
|
||||
roxmltree = "0.20"
|
||||
rust_decimal = { version = "1.36.0", default-features = false, features = ["maths"] }
|
||||
|
@ -54,7 +54,7 @@ pub fn layout_image(
|
||||
format,
|
||||
elem.alt(styles),
|
||||
engine.world,
|
||||
&families(styles).collect::<Vec<_>>(),
|
||||
&families(styles).map(|f| f.as_str()).collect::<Vec<_>>(),
|
||||
elem.flatten_text(styles),
|
||||
)
|
||||
.at(span)?;
|
||||
|
@ -11,8 +11,8 @@ use typst_library::engine::Engine;
|
||||
use typst_library::foundations::{Smart, StyleChain};
|
||||
use typst_library::layout::{Abs, Dir, Em, Frame, FrameItem, Point, Size};
|
||||
use typst_library::text::{
|
||||
families, features, is_default_ignorable, variant, Font, FontVariant, Glyph, Lang,
|
||||
Region, TextEdgeBounds, TextElem, TextItem,
|
||||
families, features, is_default_ignorable, variant, Font, FontFamily, FontVariant,
|
||||
Glyph, Lang, Region, TextEdgeBounds, TextElem, TextItem,
|
||||
};
|
||||
use typst_library::World;
|
||||
use typst_utils::SliceExt;
|
||||
@ -351,7 +351,7 @@ impl<'a> ShapedText<'a> {
|
||||
for family in families(self.styles) {
|
||||
if let Some(font) = world
|
||||
.book()
|
||||
.select(family, self.variant)
|
||||
.select(family.as_str(), self.variant)
|
||||
.and_then(|id| world.font(id))
|
||||
{
|
||||
expand(&font, TextEdgeBounds::Zero);
|
||||
@ -463,7 +463,8 @@ impl<'a> ShapedText<'a> {
|
||||
None
|
||||
};
|
||||
let mut chain = families(self.styles)
|
||||
.map(|family| book.select(family, self.variant))
|
||||
.filter(|family| family.covers().map_or(true, |c| c.is_match("-")))
|
||||
.map(|family| book.select(family.as_str(), self.variant))
|
||||
.chain(fallback_func.iter().map(|f| f()))
|
||||
.flatten();
|
||||
|
||||
@ -719,7 +720,7 @@ fn shape_segment<'a>(
|
||||
ctx: &mut ShapingContext,
|
||||
base: usize,
|
||||
text: &str,
|
||||
mut families: impl Iterator<Item = &'a str> + Clone,
|
||||
mut families: impl Iterator<Item = &'a FontFamily> + Clone,
|
||||
) {
|
||||
// Don't try shaping newlines, tabs, or default ignorables.
|
||||
if text
|
||||
@ -732,11 +733,18 @@ fn shape_segment<'a>(
|
||||
// Find the next available family.
|
||||
let world = ctx.engine.world;
|
||||
let book = world.book();
|
||||
let mut selection = families.find_map(|family| {
|
||||
book.select(family, ctx.variant)
|
||||
let mut selection = None;
|
||||
let mut covers = None;
|
||||
for family in families.by_ref() {
|
||||
selection = book
|
||||
.select(family.as_str(), ctx.variant)
|
||||
.and_then(|id| world.font(id))
|
||||
.filter(|font| !ctx.used.contains(font))
|
||||
});
|
||||
.filter(|font| !ctx.used.contains(font));
|
||||
if selection.is_some() {
|
||||
covers = family.covers();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Do font fallback if the families are exhausted and fallback is enabled.
|
||||
if selection.is_none() && ctx.fallback {
|
||||
@ -795,6 +803,16 @@ fn shape_segment<'a>(
|
||||
let pos = buffer.glyph_positions();
|
||||
let ltr = ctx.dir.is_positive();
|
||||
|
||||
// Whether the character at the given offset is covered by the coverage.
|
||||
let is_covered = |offset| {
|
||||
let end = text[offset..]
|
||||
.char_indices()
|
||||
.nth(1)
|
||||
.map(|(i, _)| offset + i)
|
||||
.unwrap_or(text.len());
|
||||
covers.map_or(true, |cov| cov.is_match(&text[offset..end]))
|
||||
};
|
||||
|
||||
// Collect the shaped glyphs, doing fallback and shaping parts again with
|
||||
// the next font if necessary.
|
||||
let mut i = 0;
|
||||
@ -803,7 +821,7 @@ fn shape_segment<'a>(
|
||||
let cluster = info.cluster as usize;
|
||||
|
||||
// Add the glyph to the shaped output.
|
||||
if info.glyph_id != 0 {
|
||||
if info.glyph_id != 0 && is_covered(cluster) {
|
||||
// Determine the text range of the glyph.
|
||||
let start = base + cluster;
|
||||
let end = base
|
||||
@ -836,7 +854,9 @@ fn shape_segment<'a>(
|
||||
} else {
|
||||
// First, search for the end of the tofu sequence.
|
||||
let k = i;
|
||||
while infos.get(i + 1).is_some_and(|info| info.glyph_id == 0) {
|
||||
while infos.get(i + 1).is_some_and(|info| {
|
||||
info.glyph_id == 0 || !is_covered(info.cluster as usize)
|
||||
}) {
|
||||
i += 1;
|
||||
}
|
||||
|
||||
|
@ -237,7 +237,7 @@ fn find_math_font(
|
||||
let variant = variant(styles);
|
||||
let world = engine.world;
|
||||
let Some(font) = families(styles).find_map(|family| {
|
||||
let id = world.book().select(family, variant)?;
|
||||
let id = world.book().select(family.as_str(), variant)?;
|
||||
let font = world.font(id)?;
|
||||
let _ = font.ttf().tables().math?.constants?;
|
||||
Some(font)
|
||||
|
@ -44,6 +44,7 @@ png = { workspace = true }
|
||||
qcms = { workspace = true }
|
||||
rayon = { workspace = true }
|
||||
regex = { workspace = true }
|
||||
regex-syntax = { workspace = true }
|
||||
roxmltree = { workspace = true }
|
||||
rust_decimal = { workspace = true }
|
||||
rustybuzz = { workspace = true }
|
||||
|
@ -29,6 +29,7 @@ pub use self::smartquote::*;
|
||||
pub use self::space::*;
|
||||
|
||||
use std::fmt::{self, Debug, Formatter};
|
||||
use std::hash::Hash;
|
||||
use std::sync::LazyLock;
|
||||
|
||||
use ecow::{eco_format, EcoString};
|
||||
@ -39,13 +40,14 @@ use rustybuzz::Feature;
|
||||
use smallvec::SmallVec;
|
||||
use ttf_parser::Tag;
|
||||
use typst_syntax::Spanned;
|
||||
use typst_utils::singleton;
|
||||
|
||||
use crate::diag::{bail, warning, HintedStrResult, SourceResult};
|
||||
use crate::engine::Engine;
|
||||
use crate::foundations::{
|
||||
cast, category, dict, elem, Args, Array, Cast, Category, Construct, Content, Dict,
|
||||
Fold, IntoValue, NativeElement, Never, NoneValue, Packed, PlainText, Repr, Resolve,
|
||||
Scope, Set, Smart, StyleChain,
|
||||
Fold, IntoValue, NativeElement, Never, NoneValue, Packed, PlainText, Regex, Repr,
|
||||
Resolve, Scope, Set, Smart, StyleChain,
|
||||
};
|
||||
use crate::layout::{Abs, Axis, Dir, Em, Length, Ratio, Rel};
|
||||
use crate::model::ParElem;
|
||||
@ -94,7 +96,21 @@ pub(super) fn define(global: &mut Scope) {
|
||||
/// ```
|
||||
#[elem(Debug, Construct, PlainText, Repr)]
|
||||
pub struct TextElem {
|
||||
/// A font family name or priority list of font family names.
|
||||
/// A font family descriptor or priority list of font family descriptor.
|
||||
///
|
||||
/// A font family descriptor can be a plain string representing the family
|
||||
/// name or a dictionary with the following keys:
|
||||
///
|
||||
/// - `name` (required): The font family name.
|
||||
/// - `covers` (optional): Defines the Unicode codepoints for which the
|
||||
/// family shall be used. This can be:
|
||||
/// - A predefined coverage set:
|
||||
/// - `{"latin-in-cjk"}` covers all codepoints except for those which
|
||||
/// exist in Latin fonts, but should preferrably be taken from CJK
|
||||
/// fonts.
|
||||
/// - A [regular expression]($regex) that defines exactly which codepoints
|
||||
/// shall be covered. Accepts only the subset of regular expressions
|
||||
/// which consist of exactly one dot, letter, or character class.
|
||||
///
|
||||
/// When processing text, Typst tries all specified font families in order
|
||||
/// until it finds a font that has the necessary glyphs. In the example
|
||||
@ -129,6 +145,21 @@ pub struct TextElem {
|
||||
///
|
||||
/// This is Latin. \
|
||||
/// هذا عربي.
|
||||
///
|
||||
/// // Change font only for numbers.
|
||||
/// #set text(font: (
|
||||
/// (name: "PT Sans", covers: regex("[0-9]")),
|
||||
/// "Libertinus Serif"
|
||||
/// ))
|
||||
///
|
||||
/// The number 123.
|
||||
///
|
||||
/// // Mix Latin and CJK fonts.
|
||||
/// #set text(font: (
|
||||
/// (name: "Inria Serif", covers: "latin-in-cjk"),
|
||||
/// "Noto Serif CJK SC"
|
||||
/// ))
|
||||
/// 分别设置“中文”和English字体
|
||||
/// ```
|
||||
#[parse({
|
||||
let font_list: Option<Spanned<FontList>> = args.named("font")?;
|
||||
@ -766,35 +797,107 @@ impl PlainText for Packed<TextElem> {
|
||||
}
|
||||
|
||||
/// A lowercased font family like "arial".
|
||||
#[derive(Clone, Eq, PartialEq, Hash)]
|
||||
pub struct FontFamily(EcoString);
|
||||
#[derive(Debug, Clone, PartialEq, Hash)]
|
||||
pub struct FontFamily {
|
||||
// The name of the font family
|
||||
name: EcoString,
|
||||
// A regex that defines the Unicode codepoints supported by the font.
|
||||
covers: Option<Covers>,
|
||||
}
|
||||
|
||||
impl FontFamily {
|
||||
/// Create a named font family variant.
|
||||
pub fn new(string: &str) -> Self {
|
||||
Self(string.to_lowercase().into())
|
||||
Self::with_coverage(string, None)
|
||||
}
|
||||
|
||||
/// Create a font family by name and optional Unicode coverage.
|
||||
pub fn with_coverage(string: &str, covers: Option<Covers>) -> Self {
|
||||
Self { name: string.to_lowercase().into(), covers }
|
||||
}
|
||||
|
||||
/// The lowercased family name.
|
||||
pub fn as_str(&self) -> &str {
|
||||
&self.0
|
||||
}
|
||||
&self.name
|
||||
}
|
||||
|
||||
impl Debug for FontFamily {
|
||||
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
|
||||
self.0.fmt(f)
|
||||
/// The user-set coverage of the font family.
|
||||
pub fn covers(&self) -> Option<&Regex> {
|
||||
self.covers.as_ref().map(|covers| covers.as_regex())
|
||||
}
|
||||
}
|
||||
|
||||
cast! {
|
||||
FontFamily,
|
||||
self => self.0.into_value(),
|
||||
self => self.name.into_value(),
|
||||
string: EcoString => Self::new(&string),
|
||||
mut v: Dict => {
|
||||
let ret = Self::with_coverage(
|
||||
&v.take("name")?.cast::<EcoString>()?,
|
||||
v.take("covers").ok().map(|v| v.cast()).transpose()?
|
||||
);
|
||||
v.finish(&["name", "covers"])?;
|
||||
ret
|
||||
},
|
||||
}
|
||||
|
||||
/// Defines which codepoints a font family will be used for.
|
||||
#[derive(Debug, Clone, PartialEq, Hash)]
|
||||
pub enum Covers {
|
||||
/// Covers all codepoints except those used both in Latin and CJK fonts.
|
||||
LatinInCjk,
|
||||
/// Covers the set of codepoints for which the regex matches.
|
||||
Regex(Regex),
|
||||
}
|
||||
|
||||
impl Covers {
|
||||
/// Retrieve the regex for the coverage.
|
||||
pub fn as_regex(&self) -> &Regex {
|
||||
match self {
|
||||
Self::LatinInCjk => singleton!(
|
||||
Regex,
|
||||
Regex::new(
|
||||
"[^\u{00B7}\u{2013}\u{2014}\u{2018}\u{2019}\
|
||||
\u{201C}\u{201D}\u{2025}-\u{2027}\u{2E3A}]"
|
||||
)
|
||||
.unwrap()
|
||||
),
|
||||
Self::Regex(regex) => regex,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cast! {
|
||||
Covers,
|
||||
self => match self {
|
||||
Self::LatinInCjk => "latin-in-cjk".into_value(),
|
||||
Self::Regex(regex) => regex.into_value(),
|
||||
},
|
||||
|
||||
/// Covers all codepoints except those used both in Latin and CJK fonts.
|
||||
"latin-in-cjk" => Covers::LatinInCjk,
|
||||
|
||||
regex: Regex => {
|
||||
let ast = regex_syntax::ast::parse::Parser::new().parse(regex.as_str());
|
||||
match ast {
|
||||
Ok(
|
||||
regex_syntax::ast::Ast::ClassBracketed(..)
|
||||
| regex_syntax::ast::Ast::ClassUnicode(..)
|
||||
| regex_syntax::ast::Ast::ClassPerl(..)
|
||||
| regex_syntax::ast::Ast::Dot(..)
|
||||
| regex_syntax::ast::Ast::Literal(..),
|
||||
) => {}
|
||||
_ => bail!(
|
||||
"coverage regex may only use dot, letters, and character classes";
|
||||
hint: "the regex is applied to each letter individually"
|
||||
),
|
||||
}
|
||||
Covers::Regex(regex)
|
||||
},
|
||||
}
|
||||
|
||||
/// Font family fallback list.
|
||||
#[derive(Debug, Default, Clone, Eq, PartialEq, Hash)]
|
||||
#[derive(Debug, Default, Clone, PartialEq, Hash)]
|
||||
pub struct FontList(pub Vec<FontFamily>);
|
||||
|
||||
impl<'a> IntoIterator for &'a FontList {
|
||||
@ -809,7 +912,7 @@ impl<'a> IntoIterator for &'a FontList {
|
||||
cast! {
|
||||
FontList,
|
||||
self => if self.0.len() == 1 {
|
||||
self.0.into_iter().next().unwrap().0.into_value()
|
||||
self.0.into_iter().next().unwrap().name.into_value()
|
||||
} else {
|
||||
self.0.into_value()
|
||||
},
|
||||
@ -818,20 +921,22 @@ cast! {
|
||||
}
|
||||
|
||||
/// Resolve a prioritized iterator over the font families.
|
||||
pub fn families(styles: StyleChain) -> impl Iterator<Item = &str> + Clone {
|
||||
const FALLBACKS: &[&str] = &[
|
||||
pub fn families(styles: StyleChain) -> impl Iterator<Item = &FontFamily> + Clone {
|
||||
let fallbacks = singleton!(Vec<FontFamily>, {
|
||||
[
|
||||
"libertinus serif",
|
||||
"twitter color emoji",
|
||||
"noto color emoji",
|
||||
"apple color emoji",
|
||||
"segoe ui emoji",
|
||||
];
|
||||
|
||||
let tail = if TextElem::fallback_in(styles) { FALLBACKS } else { &[] };
|
||||
TextElem::font_in(styles)
|
||||
]
|
||||
.into_iter()
|
||||
.map(|family| family.as_str())
|
||||
.chain(tail.iter().copied())
|
||||
.map(FontFamily::new)
|
||||
.collect()
|
||||
});
|
||||
|
||||
let tail = if TextElem::fallback_in(styles) { fallbacks.as_slice() } else { &[] };
|
||||
TextElem::font_in(styles).into_iter().chain(tail.iter())
|
||||
}
|
||||
|
||||
/// Resolve the font variant.
|
||||
|
@ -157,7 +157,11 @@ fn is_shapable(engine: &Engine, text: &str, styles: StyleChain) -> bool {
|
||||
.select(family.as_str(), variant(styles))
|
||||
.and_then(|id| world.font(id))
|
||||
{
|
||||
return text.chars().all(|c| font.ttf().glyph_index(c).is_some());
|
||||
let covers = family.covers();
|
||||
return text.chars().all(|c| {
|
||||
covers.map_or(true, |cov| cov.is_match(c.encode_utf8(&mut [0; 4])))
|
||||
&& font.ttf().glyph_index(c).is_some()
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
|
BIN
tests/ref/text-font-covers-chinese.png
Normal file
BIN
tests/ref/text-font-covers-chinese.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 2.6 KiB |
BIN
tests/ref/text-font-covers-numbers.png
Normal file
BIN
tests/ref/text-font-covers-numbers.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 500 B |
@ -112,3 +112,40 @@ I
|
||||
[ ]
|
||||
text(fill: t, "Hello")
|
||||
})
|
||||
|
||||
--- text-font-types ---
|
||||
#let ubuntu = (name: "Ubuntu", covers: regex("[\u{20}-\u{FFFF}]"))
|
||||
#set text(font: ubuntu)
|
||||
#set text(font: (ubuntu, "Ubuntu"))
|
||||
|
||||
--- text-font-covers-chinese ---
|
||||
// Without ranges, the quotation mark is using the Latin font.
|
||||
#set text(font: ("Ubuntu", "Noto Serif CJK SC"))
|
||||
分别设置“中文”和English字体
|
||||
|
||||
// With ranges, the quotation mark is using the Chinese font.
|
||||
#set text(font: ((name: "Noto Serif CJK SC", covers: regex("[\u{00B7}-\u{3134F}]")), "Ubuntu"))
|
||||
分别设置“中文”和English字体
|
||||
|
||||
// With "latin-in-cjk", the quotation mark is also using the Chinese font.
|
||||
#set text(font: ((name: "Ubuntu", covers: "latin-in-cjk"), "Noto Serif CJK SC"))
|
||||
分别设置“中文”和English字体
|
||||
|
||||
--- text-font-covers-numbers ---
|
||||
// Change font only for numbers.
|
||||
#set text(font: (
|
||||
(name: "PT Sans", covers: regex("[0-9]")),
|
||||
"Libertinus Serif"
|
||||
))
|
||||
|
||||
The number 123.
|
||||
|
||||
--- text-font-covers-bad-1 ---
|
||||
// Error: 17-59 coverage regex may only use dot, letters, and character classes
|
||||
// Hint: 17-59 the regex is applied to each letter individually
|
||||
#set text(font: (name: "Ubuntu", covers: regex("20-FFFF")))
|
||||
|
||||
--- text-font-covers-bad-2 ---
|
||||
// Error: 17-65 coverage regex may only use dot, letters, and character classes
|
||||
// Hint: 17-65 the regex is applied to each letter individually
|
||||
#set text(font: (name: "Ubuntu", covers: regex("\u{20}-\u{10}")))
|
||||
|
Loading…
x
Reference in New Issue
Block a user