mirror of
https://github.com/typst/typst
synced 2025-05-14 04:56:26 +08:00
Fix default ignorables (#5099)
This commit is contained in:
parent
d86789c1f7
commit
0343e038d3
@ -425,7 +425,7 @@ fn write_text(ctx: &mut Builder, pos: Point, text: &TextItem) -> SourceResult<()
|
||||
bail!(
|
||||
g.span.0,
|
||||
"the text {} could not be displayed with any font",
|
||||
text.text[g.range()].repr()
|
||||
TextItemView::full(text).glyph_text(g).repr(),
|
||||
);
|
||||
}
|
||||
}
|
||||
@ -441,7 +441,7 @@ fn write_text(ctx: &mut Builder, pos: Point, text: &TextItem) -> SourceResult<()
|
||||
|| tables.svg.is_some()
|
||||
|| tables.colr.is_some();
|
||||
if !has_color_glyphs {
|
||||
write_normal_text(ctx, pos, TextItemView::all_of(text))?;
|
||||
write_normal_text(ctx, pos, TextItemView::full(text))?;
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
@ -449,9 +449,9 @@ fn write_text(ctx: &mut Builder, pos: Point, text: &TextItem) -> SourceResult<()
|
||||
text.glyphs.iter().filter(|g| is_color_glyph(&text.font, g)).count();
|
||||
|
||||
if color_glyph_count == text.glyphs.len() {
|
||||
write_color_glyphs(ctx, pos, TextItemView::all_of(text))?;
|
||||
write_color_glyphs(ctx, pos, TextItemView::full(text))?;
|
||||
} else if color_glyph_count == 0 {
|
||||
write_normal_text(ctx, pos, TextItemView::all_of(text))?;
|
||||
write_normal_text(ctx, pos, TextItemView::full(text))?;
|
||||
} else {
|
||||
// Otherwise we need to split it in smaller text runs
|
||||
let mut offset = 0;
|
||||
@ -493,9 +493,7 @@ fn write_normal_text(
|
||||
|
||||
let glyph_set = ctx.resources.glyph_sets.entry(text.item.font.clone()).or_default();
|
||||
for g in text.glyphs() {
|
||||
let t = text.text();
|
||||
let segment = &t[g.range()];
|
||||
glyph_set.entry(g.id).or_insert_with(|| segment.into());
|
||||
glyph_set.entry(g.id).or_insert_with(|| text.glyph_text(&g));
|
||||
}
|
||||
|
||||
let fill_transform = ctx.state.transforms(Size::zero(), pos);
|
||||
@ -640,9 +638,7 @@ fn write_color_glyphs(
|
||||
|
||||
ctx.content.show(Str(&[index]));
|
||||
|
||||
glyph_set
|
||||
.entry(glyph.id)
|
||||
.or_insert_with(|| text.text()[glyph.range()].into());
|
||||
glyph_set.entry(glyph.id).or_insert_with(|| text.glyph_text(&glyph));
|
||||
}
|
||||
ctx.content.end_text();
|
||||
|
||||
|
@ -8,7 +8,8 @@ use crate::layout::{
|
||||
};
|
||||
use crate::syntax::Span;
|
||||
use crate::text::{
|
||||
LinebreakElem, SmartQuoteElem, SmartQuoter, SmartQuotes, SpaceElem, TextElem,
|
||||
is_default_ignorable, LinebreakElem, SmartQuoteElem, SmartQuoter, SmartQuotes,
|
||||
SpaceElem, TextElem,
|
||||
};
|
||||
use crate::utils::Numeric;
|
||||
|
||||
|
@ -2,7 +2,6 @@ use std::ops::{Add, Sub};
|
||||
|
||||
use az::SaturatingAs;
|
||||
use icu_properties::maps::{CodePointMapData, CodePointMapDataBorrowed};
|
||||
use icu_properties::sets::CodePointSetData;
|
||||
use icu_properties::LineBreak;
|
||||
use icu_provider::AsDeserializingBufferProvider;
|
||||
use icu_provider_adapters::fork::ForkByKeyProvider;
|
||||
@ -16,7 +15,7 @@ use crate::engine::Engine;
|
||||
use crate::layout::{Abs, Em};
|
||||
use crate::model::Linebreaks;
|
||||
use crate::syntax::link_prefix;
|
||||
use crate::text::{Lang, TextElem};
|
||||
use crate::text::{is_default_ignorable, Lang, TextElem};
|
||||
|
||||
/// The cost of a line or paragraph layout.
|
||||
type Cost = f64;
|
||||
@ -58,12 +57,6 @@ static LINEBREAK_DATA: Lazy<CodePointMapData<LineBreak>> = Lazy::new(|| {
|
||||
icu_properties::maps::load_line_break(&blob().as_deserializing()).unwrap()
|
||||
});
|
||||
|
||||
/// The set of Unicode default ignorables.
|
||||
static DEFAULT_IGNORABLE_DATA: Lazy<CodePointSetData> = Lazy::new(|| {
|
||||
icu_properties::sets::load_default_ignorable_code_point(&blob().as_deserializing())
|
||||
.unwrap()
|
||||
});
|
||||
|
||||
/// A line break opportunity.
|
||||
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
|
||||
pub enum Breakpoint {
|
||||
@ -80,8 +73,7 @@ impl Breakpoint {
|
||||
/// Trim a line before this breakpoint.
|
||||
pub fn trim(self, line: &str) -> &str {
|
||||
// Trim default ignorables.
|
||||
let ignorable = DEFAULT_IGNORABLE_DATA.as_borrowed();
|
||||
let line = line.trim_end_matches(|c| ignorable.contains(c));
|
||||
let line = line.trim_end_matches(is_default_ignorable);
|
||||
|
||||
match self {
|
||||
// Trim whitespace.
|
||||
@ -986,8 +978,3 @@ where
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Whether a codepoint is Unicode `Default_Ignorable`.
|
||||
pub fn is_default_ignorable(c: char) -> bool {
|
||||
DEFAULT_IGNORABLE_DATA.as_borrowed().contains(c)
|
||||
}
|
||||
|
@ -10,7 +10,7 @@ use comemo::{Track, Tracked, TrackedMut};
|
||||
use self::collect::{collect, Item, Segment, SpanMapper};
|
||||
use self::finalize::finalize;
|
||||
use self::line::{commit, line, Line};
|
||||
use self::linebreak::{is_default_ignorable, linebreak, Breakpoint};
|
||||
use self::linebreak::{linebreak, Breakpoint};
|
||||
use self::prepare::{prepare, Preparation};
|
||||
use self::shaping::{
|
||||
cjk_punct_style, is_of_cj_script, shape_range, ShapedGlyph, ShapedText,
|
||||
|
@ -5,7 +5,7 @@ use std::sync::Arc;
|
||||
|
||||
use az::SaturatingAs;
|
||||
use ecow::EcoString;
|
||||
use rustybuzz::{ShapePlan, UnicodeBuffer};
|
||||
use rustybuzz::{BufferFlags, ShapePlan, UnicodeBuffer};
|
||||
use ttf_parser::Tag;
|
||||
use unicode_bidi::{BidiInfo, Level as BidiLevel};
|
||||
use unicode_script::{Script, UnicodeScript};
|
||||
@ -15,8 +15,8 @@ use crate::engine::Engine;
|
||||
use crate::foundations::{Smart, StyleChain};
|
||||
use crate::layout::{Abs, Dir, Em, Frame, FrameItem, Point, Size};
|
||||
use crate::text::{
|
||||
decorate, families, features, variant, Font, FontVariant, Glyph, Lang, Region,
|
||||
TextElem, TextItem,
|
||||
decorate, families, features, is_default_ignorable, variant, Font, FontVariant,
|
||||
Glyph, Lang, Region, TextElem, TextItem,
|
||||
};
|
||||
use crate::utils::SliceExt;
|
||||
use crate::World;
|
||||
@ -725,8 +725,11 @@ fn shape_segment<'a>(
|
||||
text: &str,
|
||||
mut families: impl Iterator<Item = &'a str> + Clone,
|
||||
) {
|
||||
// Fonts dont have newlines and tabs.
|
||||
if text.chars().all(|c| c == '\n' || c == '\t') {
|
||||
// Don't try shaping newlines, tabs, or default ignorables.
|
||||
if text
|
||||
.chars()
|
||||
.all(|c| c == '\n' || c == '\t' || is_default_ignorable(c))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
@ -774,6 +777,12 @@ fn shape_segment<'a>(
|
||||
});
|
||||
buffer.guess_segment_properties();
|
||||
|
||||
// By default, Harfbuzz will create zero-width space glyphs for default
|
||||
// ignorables. This is probably useful for GUI apps that want noticable
|
||||
// effects on the cursor for those, but for us it's not useful and hurts
|
||||
// text extraction.
|
||||
buffer.set_flags(BufferFlags::REMOVE_DEFAULT_IGNORABLES);
|
||||
|
||||
// Prepare the shape plan. This plan depends on direction, script, language,
|
||||
// and features, but is independent from the text and can thus be memoized.
|
||||
let plan = create_shape_plan(
|
||||
|
@ -5,7 +5,7 @@ use ecow::EcoString;
|
||||
|
||||
use crate::layout::{Abs, Em};
|
||||
use crate::syntax::Span;
|
||||
use crate::text::{Font, Lang, Region};
|
||||
use crate::text::{is_default_ignorable, Font, Lang, Region};
|
||||
use crate::visualize::{FixedStroke, Paint};
|
||||
|
||||
/// A run of shaped text.
|
||||
@ -78,7 +78,7 @@ pub struct TextItemView<'a> {
|
||||
|
||||
impl<'a> TextItemView<'a> {
|
||||
/// Build a TextItemView for the whole contents of a TextItem.
|
||||
pub fn all_of(text: &'a TextItem) -> Self {
|
||||
pub fn full(text: &'a TextItem) -> Self {
|
||||
Self::from_glyph_range(text, 0..text.glyphs.len())
|
||||
}
|
||||
|
||||
@ -87,28 +87,30 @@ impl<'a> TextItemView<'a> {
|
||||
TextItemView { item: text, glyph_range }
|
||||
}
|
||||
|
||||
/// Obtains a glyph in this slice, remapping the range that it represents in
|
||||
/// the original text so that it is relative to the start of the slice
|
||||
pub fn glyph_at(&self, index: usize) -> Glyph {
|
||||
let g = &self.item.glyphs[self.glyph_range.start + index];
|
||||
let base = self.text_range().start as u16;
|
||||
Glyph {
|
||||
range: g.range.start - base..g.range.end - base,
|
||||
..*g
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns an iterator over the glyphs of the slice.
|
||||
///
|
||||
/// The range of text that each glyph represents is remapped to be relative
|
||||
/// to the start of the slice.
|
||||
pub fn glyphs(&self) -> impl Iterator<Item = Glyph> + '_ {
|
||||
(0..self.glyph_range.len()).map(|index| self.glyph_at(index))
|
||||
let first = self.item.glyphs[self.glyph_range.start].range();
|
||||
let last = self.item.glyphs[self.glyph_range.end - 1].range();
|
||||
let base = first.start.min(last.start) as u16;
|
||||
(0..self.glyph_range.len()).map(move |index| {
|
||||
let g = &self.item.glyphs[self.glyph_range.start + index];
|
||||
Glyph {
|
||||
range: g.range.start - base..g.range.end - base,
|
||||
..*g
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
/// The plain text that this slice represents
|
||||
pub fn text(&self) -> &str {
|
||||
&self.item.text[self.text_range()]
|
||||
/// The plain text for the given glyph. This is an approximation since
|
||||
/// glyphs do not correspond 1-1 with codepoints.
|
||||
pub fn glyph_text(&self, glyph: &Glyph) -> EcoString {
|
||||
self.item.text[glyph.range()]
|
||||
.chars()
|
||||
.filter(|&c| !is_default_ignorable(c))
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// The total width of this text slice
|
||||
@ -119,12 +121,4 @@ impl<'a> TextItemView<'a> {
|
||||
.sum::<Em>()
|
||||
.at(self.item.size)
|
||||
}
|
||||
|
||||
/// The range of text in the original TextItem that this slice corresponds
|
||||
/// to.
|
||||
fn text_range(&self) -> Range<usize> {
|
||||
let first = self.item.glyphs[self.glyph_range.start].range();
|
||||
let last = self.item.glyphs[self.glyph_range.end - 1].range();
|
||||
first.start.min(last.start)..first.end.max(last.end)
|
||||
}
|
||||
}
|
||||
|
@ -31,6 +31,10 @@ pub use self::space::*;
|
||||
use std::fmt::{self, Debug, Formatter};
|
||||
|
||||
use ecow::{eco_format, EcoString};
|
||||
use icu_properties::sets::CodePointSetData;
|
||||
use icu_provider::AsDeserializingBufferProvider;
|
||||
use icu_provider_blob::BlobDataProvider;
|
||||
use once_cell::sync::Lazy;
|
||||
use rustybuzz::Feature;
|
||||
use smallvec::SmallVec;
|
||||
use ttf_parser::{Rect, Tag};
|
||||
@ -1310,6 +1314,20 @@ cast! {
|
||||
},
|
||||
}
|
||||
|
||||
/// Whether a codepoint is Unicode `Default_Ignorable`.
|
||||
pub(crate) fn is_default_ignorable(c: char) -> bool {
|
||||
/// The set of Unicode default ignorables.
|
||||
static DEFAULT_IGNORABLE_DATA: Lazy<CodePointSetData> = Lazy::new(|| {
|
||||
icu_properties::sets::load_default_ignorable_code_point(
|
||||
&BlobDataProvider::try_new_from_static_blob(typst_assets::icu::ICU)
|
||||
.unwrap()
|
||||
.as_deserializing(),
|
||||
)
|
||||
.unwrap()
|
||||
});
|
||||
DEFAULT_IGNORABLE_DATA.as_borrowed().contains(c)
|
||||
}
|
||||
|
||||
/// Pushes `text` wrapped in LRE/RLE + PDF to `out`.
|
||||
pub(crate) fn isolate(text: Content, styles: StyleChain, out: &mut Vec<Content>) {
|
||||
out.push(TextElem::packed(match TextElem::dir_in(styles) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user