mirror of
https://github.com/typst/typst
synced 2025-05-14 04:56:26 +08:00
Fix default ignorables (#5099)
This commit is contained in:
parent
d86789c1f7
commit
0343e038d3
@ -425,7 +425,7 @@ fn write_text(ctx: &mut Builder, pos: Point, text: &TextItem) -> SourceResult<()
|
|||||||
bail!(
|
bail!(
|
||||||
g.span.0,
|
g.span.0,
|
||||||
"the text {} could not be displayed with any font",
|
"the text {} could not be displayed with any font",
|
||||||
text.text[g.range()].repr()
|
TextItemView::full(text).glyph_text(g).repr(),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -441,7 +441,7 @@ fn write_text(ctx: &mut Builder, pos: Point, text: &TextItem) -> SourceResult<()
|
|||||||
|| tables.svg.is_some()
|
|| tables.svg.is_some()
|
||||||
|| tables.colr.is_some();
|
|| tables.colr.is_some();
|
||||||
if !has_color_glyphs {
|
if !has_color_glyphs {
|
||||||
write_normal_text(ctx, pos, TextItemView::all_of(text))?;
|
write_normal_text(ctx, pos, TextItemView::full(text))?;
|
||||||
return Ok(());
|
return Ok(());
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -449,9 +449,9 @@ fn write_text(ctx: &mut Builder, pos: Point, text: &TextItem) -> SourceResult<()
|
|||||||
text.glyphs.iter().filter(|g| is_color_glyph(&text.font, g)).count();
|
text.glyphs.iter().filter(|g| is_color_glyph(&text.font, g)).count();
|
||||||
|
|
||||||
if color_glyph_count == text.glyphs.len() {
|
if color_glyph_count == text.glyphs.len() {
|
||||||
write_color_glyphs(ctx, pos, TextItemView::all_of(text))?;
|
write_color_glyphs(ctx, pos, TextItemView::full(text))?;
|
||||||
} else if color_glyph_count == 0 {
|
} else if color_glyph_count == 0 {
|
||||||
write_normal_text(ctx, pos, TextItemView::all_of(text))?;
|
write_normal_text(ctx, pos, TextItemView::full(text))?;
|
||||||
} else {
|
} else {
|
||||||
// Otherwise we need to split it in smaller text runs
|
// Otherwise we need to split it in smaller text runs
|
||||||
let mut offset = 0;
|
let mut offset = 0;
|
||||||
@ -493,9 +493,7 @@ fn write_normal_text(
|
|||||||
|
|
||||||
let glyph_set = ctx.resources.glyph_sets.entry(text.item.font.clone()).or_default();
|
let glyph_set = ctx.resources.glyph_sets.entry(text.item.font.clone()).or_default();
|
||||||
for g in text.glyphs() {
|
for g in text.glyphs() {
|
||||||
let t = text.text();
|
glyph_set.entry(g.id).or_insert_with(|| text.glyph_text(&g));
|
||||||
let segment = &t[g.range()];
|
|
||||||
glyph_set.entry(g.id).or_insert_with(|| segment.into());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
let fill_transform = ctx.state.transforms(Size::zero(), pos);
|
let fill_transform = ctx.state.transforms(Size::zero(), pos);
|
||||||
@ -640,9 +638,7 @@ fn write_color_glyphs(
|
|||||||
|
|
||||||
ctx.content.show(Str(&[index]));
|
ctx.content.show(Str(&[index]));
|
||||||
|
|
||||||
glyph_set
|
glyph_set.entry(glyph.id).or_insert_with(|| text.glyph_text(&glyph));
|
||||||
.entry(glyph.id)
|
|
||||||
.or_insert_with(|| text.text()[glyph.range()].into());
|
|
||||||
}
|
}
|
||||||
ctx.content.end_text();
|
ctx.content.end_text();
|
||||||
|
|
||||||
|
@ -8,7 +8,8 @@ use crate::layout::{
|
|||||||
};
|
};
|
||||||
use crate::syntax::Span;
|
use crate::syntax::Span;
|
||||||
use crate::text::{
|
use crate::text::{
|
||||||
LinebreakElem, SmartQuoteElem, SmartQuoter, SmartQuotes, SpaceElem, TextElem,
|
is_default_ignorable, LinebreakElem, SmartQuoteElem, SmartQuoter, SmartQuotes,
|
||||||
|
SpaceElem, TextElem,
|
||||||
};
|
};
|
||||||
use crate::utils::Numeric;
|
use crate::utils::Numeric;
|
||||||
|
|
||||||
|
@ -2,7 +2,6 @@ use std::ops::{Add, Sub};
|
|||||||
|
|
||||||
use az::SaturatingAs;
|
use az::SaturatingAs;
|
||||||
use icu_properties::maps::{CodePointMapData, CodePointMapDataBorrowed};
|
use icu_properties::maps::{CodePointMapData, CodePointMapDataBorrowed};
|
||||||
use icu_properties::sets::CodePointSetData;
|
|
||||||
use icu_properties::LineBreak;
|
use icu_properties::LineBreak;
|
||||||
use icu_provider::AsDeserializingBufferProvider;
|
use icu_provider::AsDeserializingBufferProvider;
|
||||||
use icu_provider_adapters::fork::ForkByKeyProvider;
|
use icu_provider_adapters::fork::ForkByKeyProvider;
|
||||||
@ -16,7 +15,7 @@ use crate::engine::Engine;
|
|||||||
use crate::layout::{Abs, Em};
|
use crate::layout::{Abs, Em};
|
||||||
use crate::model::Linebreaks;
|
use crate::model::Linebreaks;
|
||||||
use crate::syntax::link_prefix;
|
use crate::syntax::link_prefix;
|
||||||
use crate::text::{Lang, TextElem};
|
use crate::text::{is_default_ignorable, Lang, TextElem};
|
||||||
|
|
||||||
/// The cost of a line or paragraph layout.
|
/// The cost of a line or paragraph layout.
|
||||||
type Cost = f64;
|
type Cost = f64;
|
||||||
@ -58,12 +57,6 @@ static LINEBREAK_DATA: Lazy<CodePointMapData<LineBreak>> = Lazy::new(|| {
|
|||||||
icu_properties::maps::load_line_break(&blob().as_deserializing()).unwrap()
|
icu_properties::maps::load_line_break(&blob().as_deserializing()).unwrap()
|
||||||
});
|
});
|
||||||
|
|
||||||
/// The set of Unicode default ignorables.
|
|
||||||
static DEFAULT_IGNORABLE_DATA: Lazy<CodePointSetData> = Lazy::new(|| {
|
|
||||||
icu_properties::sets::load_default_ignorable_code_point(&blob().as_deserializing())
|
|
||||||
.unwrap()
|
|
||||||
});
|
|
||||||
|
|
||||||
/// A line break opportunity.
|
/// A line break opportunity.
|
||||||
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
|
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
|
||||||
pub enum Breakpoint {
|
pub enum Breakpoint {
|
||||||
@ -80,8 +73,7 @@ impl Breakpoint {
|
|||||||
/// Trim a line before this breakpoint.
|
/// Trim a line before this breakpoint.
|
||||||
pub fn trim(self, line: &str) -> &str {
|
pub fn trim(self, line: &str) -> &str {
|
||||||
// Trim default ignorables.
|
// Trim default ignorables.
|
||||||
let ignorable = DEFAULT_IGNORABLE_DATA.as_borrowed();
|
let line = line.trim_end_matches(is_default_ignorable);
|
||||||
let line = line.trim_end_matches(|c| ignorable.contains(c));
|
|
||||||
|
|
||||||
match self {
|
match self {
|
||||||
// Trim whitespace.
|
// Trim whitespace.
|
||||||
@ -986,8 +978,3 @@ where
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Whether a codepoint is Unicode `Default_Ignorable`.
|
|
||||||
pub fn is_default_ignorable(c: char) -> bool {
|
|
||||||
DEFAULT_IGNORABLE_DATA.as_borrowed().contains(c)
|
|
||||||
}
|
|
||||||
|
@ -10,7 +10,7 @@ use comemo::{Track, Tracked, TrackedMut};
|
|||||||
use self::collect::{collect, Item, Segment, SpanMapper};
|
use self::collect::{collect, Item, Segment, SpanMapper};
|
||||||
use self::finalize::finalize;
|
use self::finalize::finalize;
|
||||||
use self::line::{commit, line, Line};
|
use self::line::{commit, line, Line};
|
||||||
use self::linebreak::{is_default_ignorable, linebreak, Breakpoint};
|
use self::linebreak::{linebreak, Breakpoint};
|
||||||
use self::prepare::{prepare, Preparation};
|
use self::prepare::{prepare, Preparation};
|
||||||
use self::shaping::{
|
use self::shaping::{
|
||||||
cjk_punct_style, is_of_cj_script, shape_range, ShapedGlyph, ShapedText,
|
cjk_punct_style, is_of_cj_script, shape_range, ShapedGlyph, ShapedText,
|
||||||
|
@ -5,7 +5,7 @@ use std::sync::Arc;
|
|||||||
|
|
||||||
use az::SaturatingAs;
|
use az::SaturatingAs;
|
||||||
use ecow::EcoString;
|
use ecow::EcoString;
|
||||||
use rustybuzz::{ShapePlan, UnicodeBuffer};
|
use rustybuzz::{BufferFlags, ShapePlan, UnicodeBuffer};
|
||||||
use ttf_parser::Tag;
|
use ttf_parser::Tag;
|
||||||
use unicode_bidi::{BidiInfo, Level as BidiLevel};
|
use unicode_bidi::{BidiInfo, Level as BidiLevel};
|
||||||
use unicode_script::{Script, UnicodeScript};
|
use unicode_script::{Script, UnicodeScript};
|
||||||
@ -15,8 +15,8 @@ use crate::engine::Engine;
|
|||||||
use crate::foundations::{Smart, StyleChain};
|
use crate::foundations::{Smart, StyleChain};
|
||||||
use crate::layout::{Abs, Dir, Em, Frame, FrameItem, Point, Size};
|
use crate::layout::{Abs, Dir, Em, Frame, FrameItem, Point, Size};
|
||||||
use crate::text::{
|
use crate::text::{
|
||||||
decorate, families, features, variant, Font, FontVariant, Glyph, Lang, Region,
|
decorate, families, features, is_default_ignorable, variant, Font, FontVariant,
|
||||||
TextElem, TextItem,
|
Glyph, Lang, Region, TextElem, TextItem,
|
||||||
};
|
};
|
||||||
use crate::utils::SliceExt;
|
use crate::utils::SliceExt;
|
||||||
use crate::World;
|
use crate::World;
|
||||||
@ -725,8 +725,11 @@ fn shape_segment<'a>(
|
|||||||
text: &str,
|
text: &str,
|
||||||
mut families: impl Iterator<Item = &'a str> + Clone,
|
mut families: impl Iterator<Item = &'a str> + Clone,
|
||||||
) {
|
) {
|
||||||
// Fonts dont have newlines and tabs.
|
// Don't try shaping newlines, tabs, or default ignorables.
|
||||||
if text.chars().all(|c| c == '\n' || c == '\t') {
|
if text
|
||||||
|
.chars()
|
||||||
|
.all(|c| c == '\n' || c == '\t' || is_default_ignorable(c))
|
||||||
|
{
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -774,6 +777,12 @@ fn shape_segment<'a>(
|
|||||||
});
|
});
|
||||||
buffer.guess_segment_properties();
|
buffer.guess_segment_properties();
|
||||||
|
|
||||||
|
// By default, Harfbuzz will create zero-width space glyphs for default
|
||||||
|
// ignorables. This is probably useful for GUI apps that want noticable
|
||||||
|
// effects on the cursor for those, but for us it's not useful and hurts
|
||||||
|
// text extraction.
|
||||||
|
buffer.set_flags(BufferFlags::REMOVE_DEFAULT_IGNORABLES);
|
||||||
|
|
||||||
// Prepare the shape plan. This plan depends on direction, script, language,
|
// Prepare the shape plan. This plan depends on direction, script, language,
|
||||||
// and features, but is independent from the text and can thus be memoized.
|
// and features, but is independent from the text and can thus be memoized.
|
||||||
let plan = create_shape_plan(
|
let plan = create_shape_plan(
|
||||||
|
@ -5,7 +5,7 @@ use ecow::EcoString;
|
|||||||
|
|
||||||
use crate::layout::{Abs, Em};
|
use crate::layout::{Abs, Em};
|
||||||
use crate::syntax::Span;
|
use crate::syntax::Span;
|
||||||
use crate::text::{Font, Lang, Region};
|
use crate::text::{is_default_ignorable, Font, Lang, Region};
|
||||||
use crate::visualize::{FixedStroke, Paint};
|
use crate::visualize::{FixedStroke, Paint};
|
||||||
|
|
||||||
/// A run of shaped text.
|
/// A run of shaped text.
|
||||||
@ -78,7 +78,7 @@ pub struct TextItemView<'a> {
|
|||||||
|
|
||||||
impl<'a> TextItemView<'a> {
|
impl<'a> TextItemView<'a> {
|
||||||
/// Build a TextItemView for the whole contents of a TextItem.
|
/// Build a TextItemView for the whole contents of a TextItem.
|
||||||
pub fn all_of(text: &'a TextItem) -> Self {
|
pub fn full(text: &'a TextItem) -> Self {
|
||||||
Self::from_glyph_range(text, 0..text.glyphs.len())
|
Self::from_glyph_range(text, 0..text.glyphs.len())
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -87,28 +87,30 @@ impl<'a> TextItemView<'a> {
|
|||||||
TextItemView { item: text, glyph_range }
|
TextItemView { item: text, glyph_range }
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Obtains a glyph in this slice, remapping the range that it represents in
|
|
||||||
/// the original text so that it is relative to the start of the slice
|
|
||||||
pub fn glyph_at(&self, index: usize) -> Glyph {
|
|
||||||
let g = &self.item.glyphs[self.glyph_range.start + index];
|
|
||||||
let base = self.text_range().start as u16;
|
|
||||||
Glyph {
|
|
||||||
range: g.range.start - base..g.range.end - base,
|
|
||||||
..*g
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Returns an iterator over the glyphs of the slice.
|
/// Returns an iterator over the glyphs of the slice.
|
||||||
///
|
///
|
||||||
/// The range of text that each glyph represents is remapped to be relative
|
/// The range of text that each glyph represents is remapped to be relative
|
||||||
/// to the start of the slice.
|
/// to the start of the slice.
|
||||||
pub fn glyphs(&self) -> impl Iterator<Item = Glyph> + '_ {
|
pub fn glyphs(&self) -> impl Iterator<Item = Glyph> + '_ {
|
||||||
(0..self.glyph_range.len()).map(|index| self.glyph_at(index))
|
let first = self.item.glyphs[self.glyph_range.start].range();
|
||||||
|
let last = self.item.glyphs[self.glyph_range.end - 1].range();
|
||||||
|
let base = first.start.min(last.start) as u16;
|
||||||
|
(0..self.glyph_range.len()).map(move |index| {
|
||||||
|
let g = &self.item.glyphs[self.glyph_range.start + index];
|
||||||
|
Glyph {
|
||||||
|
range: g.range.start - base..g.range.end - base,
|
||||||
|
..*g
|
||||||
|
}
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
/// The plain text that this slice represents
|
/// The plain text for the given glyph. This is an approximation since
|
||||||
pub fn text(&self) -> &str {
|
/// glyphs do not correspond 1-1 with codepoints.
|
||||||
&self.item.text[self.text_range()]
|
pub fn glyph_text(&self, glyph: &Glyph) -> EcoString {
|
||||||
|
self.item.text[glyph.range()]
|
||||||
|
.chars()
|
||||||
|
.filter(|&c| !is_default_ignorable(c))
|
||||||
|
.collect()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// The total width of this text slice
|
/// The total width of this text slice
|
||||||
@ -119,12 +121,4 @@ impl<'a> TextItemView<'a> {
|
|||||||
.sum::<Em>()
|
.sum::<Em>()
|
||||||
.at(self.item.size)
|
.at(self.item.size)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// The range of text in the original TextItem that this slice corresponds
|
|
||||||
/// to.
|
|
||||||
fn text_range(&self) -> Range<usize> {
|
|
||||||
let first = self.item.glyphs[self.glyph_range.start].range();
|
|
||||||
let last = self.item.glyphs[self.glyph_range.end - 1].range();
|
|
||||||
first.start.min(last.start)..first.end.max(last.end)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
@ -31,6 +31,10 @@ pub use self::space::*;
|
|||||||
use std::fmt::{self, Debug, Formatter};
|
use std::fmt::{self, Debug, Formatter};
|
||||||
|
|
||||||
use ecow::{eco_format, EcoString};
|
use ecow::{eco_format, EcoString};
|
||||||
|
use icu_properties::sets::CodePointSetData;
|
||||||
|
use icu_provider::AsDeserializingBufferProvider;
|
||||||
|
use icu_provider_blob::BlobDataProvider;
|
||||||
|
use once_cell::sync::Lazy;
|
||||||
use rustybuzz::Feature;
|
use rustybuzz::Feature;
|
||||||
use smallvec::SmallVec;
|
use smallvec::SmallVec;
|
||||||
use ttf_parser::{Rect, Tag};
|
use ttf_parser::{Rect, Tag};
|
||||||
@ -1310,6 +1314,20 @@ cast! {
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Whether a codepoint is Unicode `Default_Ignorable`.
|
||||||
|
pub(crate) fn is_default_ignorable(c: char) -> bool {
|
||||||
|
/// The set of Unicode default ignorables.
|
||||||
|
static DEFAULT_IGNORABLE_DATA: Lazy<CodePointSetData> = Lazy::new(|| {
|
||||||
|
icu_properties::sets::load_default_ignorable_code_point(
|
||||||
|
&BlobDataProvider::try_new_from_static_blob(typst_assets::icu::ICU)
|
||||||
|
.unwrap()
|
||||||
|
.as_deserializing(),
|
||||||
|
)
|
||||||
|
.unwrap()
|
||||||
|
});
|
||||||
|
DEFAULT_IGNORABLE_DATA.as_borrowed().contains(c)
|
||||||
|
}
|
||||||
|
|
||||||
/// Pushes `text` wrapped in LRE/RLE + PDF to `out`.
|
/// Pushes `text` wrapped in LRE/RLE + PDF to `out`.
|
||||||
pub(crate) fn isolate(text: Content, styles: StyleChain, out: &mut Vec<Content>) {
|
pub(crate) fn isolate(text: Content, styles: StyleChain, out: &mut Vec<Content>) {
|
||||||
out.push(TextElem::packed(match TextElem::dir_in(styles) {
|
out.push(TextElem::packed(match TextElem::dir_in(styles) {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user