mirror of
https://github.com/typst/typst
synced 2025-05-14 04:56:26 +08:00
Segment by script
This commit is contained in:
parent
56968bc0d6
commit
c3a387b8f7
1
Cargo.lock
generated
1
Cargo.lock
generated
@ -873,6 +873,7 @@ dependencies = [
|
|||||||
"typed-arena",
|
"typed-arena",
|
||||||
"typst-macros",
|
"typst-macros",
|
||||||
"unicode-bidi",
|
"unicode-bidi",
|
||||||
|
"unicode-script",
|
||||||
"unicode-segmentation",
|
"unicode-segmentation",
|
||||||
"unicode-xid",
|
"unicode-xid",
|
||||||
"usvg",
|
"usvg",
|
||||||
|
@ -30,6 +30,7 @@ rustybuzz = "0.4"
|
|||||||
unicode-bidi = "0.3.5"
|
unicode-bidi = "0.3.5"
|
||||||
unicode-segmentation = "1"
|
unicode-segmentation = "1"
|
||||||
unicode-xid = "0.2"
|
unicode-xid = "0.2"
|
||||||
|
unicode-script = "0.5"
|
||||||
xi-unicode = "0.3"
|
xi-unicode = "0.3"
|
||||||
|
|
||||||
# Raster and vector graphics handling
|
# Raster and vector graphics handling
|
||||||
|
BIN
fonts/IBMPlexSansDevanagari-Regular.ttf
Normal file
BIN
fonts/IBMPlexSansDevanagari-Regular.ttf
Normal file
Binary file not shown.
@ -1,13 +1,14 @@
|
|||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use unicode_bidi::{BidiInfo, Level};
|
use unicode_bidi::{BidiInfo, Level};
|
||||||
|
use unicode_script::{Script, UnicodeScript};
|
||||||
use xi_unicode::LineBreakIterator;
|
use xi_unicode::LineBreakIterator;
|
||||||
|
|
||||||
use super::{shape, Lang, ShapedText, TextNode};
|
use super::{shape, Lang, ShapedText, TextNode};
|
||||||
use crate::font::FontStore;
|
use crate::font::FontStore;
|
||||||
use crate::library::layout::Spacing;
|
use crate::library::layout::Spacing;
|
||||||
use crate::library::prelude::*;
|
use crate::library::prelude::*;
|
||||||
use crate::util::{ArcExt, EcoString, SliceExt};
|
use crate::util::{ArcExt, EcoString};
|
||||||
|
|
||||||
/// Arrange text, spacing and inline-level nodes into a paragraph.
|
/// Arrange text, spacing and inline-level nodes into a paragraph.
|
||||||
#[derive(Hash)]
|
#[derive(Hash)]
|
||||||
@ -437,23 +438,46 @@ fn prepare<'a>(
|
|||||||
_ => None,
|
_ => None,
|
||||||
});
|
});
|
||||||
|
|
||||||
let mut items = vec![];
|
|
||||||
let mut cursor = 0;
|
let mut cursor = 0;
|
||||||
|
let mut items = vec![];
|
||||||
|
|
||||||
// Layout the children and collect them into items.
|
// Layout the children and collect them into items.
|
||||||
for (segment, styles) in segments {
|
for (segment, styles) in segments {
|
||||||
|
let end = cursor + segment.len();
|
||||||
match segment {
|
match segment {
|
||||||
Segment::Text(len) => {
|
Segment::Text(_) => {
|
||||||
// TODO: Also split by script.
|
let mut process = |text, level: Level| {
|
||||||
let mut start = cursor;
|
|
||||||
for (level, count) in bidi.levels[cursor .. cursor + len].group() {
|
|
||||||
let end = start + count;
|
|
||||||
let text = &bidi.text[start .. end];
|
|
||||||
let dir = if level.is_ltr() { Dir::LTR } else { Dir::RTL };
|
let dir = if level.is_ltr() { Dir::LTR } else { Dir::RTL };
|
||||||
let shaped = shape(&mut ctx.fonts, text, styles, dir);
|
let shaped = shape(&mut ctx.fonts, text, styles, dir);
|
||||||
items.push(Item::Text(shaped));
|
items.push(Item::Text(shaped));
|
||||||
start = end;
|
};
|
||||||
|
|
||||||
|
let mut prev_level = Level::ltr();
|
||||||
|
let mut prev_script = Script::Unknown;
|
||||||
|
|
||||||
|
// Group by embedding level and script.
|
||||||
|
for i in cursor .. end {
|
||||||
|
if !text.is_char_boundary(i) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
let level = bidi.levels[i];
|
||||||
|
let script =
|
||||||
|
text[i ..].chars().next().map_or(Script::Unknown, |c| c.script());
|
||||||
|
|
||||||
|
if level != prev_level || !is_compatible(script, prev_script) {
|
||||||
|
if cursor < i {
|
||||||
|
process(&text[cursor .. i], prev_level);
|
||||||
|
}
|
||||||
|
cursor = i;
|
||||||
|
prev_level = level;
|
||||||
|
prev_script = script;
|
||||||
|
} else if is_generic_script(prev_script) {
|
||||||
|
prev_script = script;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
process(&text[cursor .. end], prev_level);
|
||||||
}
|
}
|
||||||
Segment::Spacing(spacing) => match spacing {
|
Segment::Spacing(spacing) => match spacing {
|
||||||
Spacing::Relative(v) => {
|
Spacing::Relative(v) => {
|
||||||
@ -482,12 +506,22 @@ fn prepare<'a>(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
cursor += segment.len();
|
cursor = end;
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(Preparation { bidi, items, styles, children: &par.0 })
|
Ok(Preparation { bidi, items, styles, children: &par.0 })
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Whether this is not a specific script.
|
||||||
|
fn is_generic_script(script: Script) -> bool {
|
||||||
|
matches!(script, Script::Unknown | Script::Common | Script::Inherited)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Whether these script can be part of the same shape run.
|
||||||
|
fn is_compatible(a: Script, b: Script) -> bool {
|
||||||
|
is_generic_script(a) || is_generic_script(b) || a == b
|
||||||
|
}
|
||||||
|
|
||||||
/// Find suitable linebreaks.
|
/// Find suitable linebreaks.
|
||||||
fn linebreak<'a>(
|
fn linebreak<'a>(
|
||||||
p: &'a Preparation<'a>,
|
p: &'a Preparation<'a>,
|
||||||
|
@ -12,7 +12,6 @@ use crate::util::SliceExt;
|
|||||||
/// This type contains owned or borrowed shaped text runs, which can be
|
/// This type contains owned or borrowed shaped text runs, which can be
|
||||||
/// measured, used to reshape substrings more quickly and converted into a
|
/// measured, used to reshape substrings more quickly and converted into a
|
||||||
/// frame.
|
/// frame.
|
||||||
#[derive(Debug, Clone)]
|
|
||||||
pub struct ShapedText<'a> {
|
pub struct ShapedText<'a> {
|
||||||
/// The text that was shaped.
|
/// The text that was shaped.
|
||||||
pub text: &'a str,
|
pub text: &'a str,
|
||||||
@ -269,11 +268,13 @@ impl<'a> ShapedText<'a> {
|
|||||||
// RTL needs offset one because the left side of the range should be
|
// RTL needs offset one because the left side of the range should be
|
||||||
// exclusive and the right side inclusive, contrary to the normal
|
// exclusive and the right side inclusive, contrary to the normal
|
||||||
// behaviour of ranges.
|
// behaviour of ranges.
|
||||||
if !ltr {
|
self.glyphs[idx].safe_to_break.then(|| idx + (!ltr) as usize)
|
||||||
idx += 1;
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
self.glyphs[idx].safe_to_break.then(|| idx)
|
impl Debug for ShapedText<'_> {
|
||||||
|
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
|
||||||
|
self.text.fmt(f)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -103,12 +103,6 @@ where
|
|||||||
|
|
||||||
/// Additional methods for slices.
|
/// Additional methods for slices.
|
||||||
pub trait SliceExt<T> {
|
pub trait SliceExt<T> {
|
||||||
/// Find consecutive runs of the same elements in a slice and yield for
|
|
||||||
/// each such run the element and number of times it appears.
|
|
||||||
fn group(&self) -> Group<'_, T>
|
|
||||||
where
|
|
||||||
T: PartialEq;
|
|
||||||
|
|
||||||
/// Split a slice into consecutive runs with the same key and yield for
|
/// Split a slice into consecutive runs with the same key and yield for
|
||||||
/// each such run the key and the slice of elements with that key.
|
/// each such run the key and the slice of elements with that key.
|
||||||
fn group_by_key<K, F>(&self, f: F) -> GroupByKey<'_, T, F>
|
fn group_by_key<K, F>(&self, f: F) -> GroupByKey<'_, T, F>
|
||||||
@ -118,35 +112,11 @@ pub trait SliceExt<T> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl<T> SliceExt<T> for [T] {
|
impl<T> SliceExt<T> for [T] {
|
||||||
fn group(&self) -> Group<'_, T> {
|
|
||||||
Group { slice: self }
|
|
||||||
}
|
|
||||||
|
|
||||||
fn group_by_key<K, F>(&self, f: F) -> GroupByKey<'_, T, F> {
|
fn group_by_key<K, F>(&self, f: F) -> GroupByKey<'_, T, F> {
|
||||||
GroupByKey { slice: self, f }
|
GroupByKey { slice: self, f }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// This struct is created by [`SliceExt::group`].
|
|
||||||
pub struct Group<'a, T> {
|
|
||||||
slice: &'a [T],
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'a, T> Iterator for Group<'a, T>
|
|
||||||
where
|
|
||||||
T: PartialEq,
|
|
||||||
{
|
|
||||||
type Item = (&'a T, usize);
|
|
||||||
|
|
||||||
fn next(&mut self) -> Option<Self::Item> {
|
|
||||||
let mut iter = self.slice.iter();
|
|
||||||
let first = iter.next()?;
|
|
||||||
let count = 1 + iter.take_while(|&t| t == first).count();
|
|
||||||
self.slice = &self.slice[count ..];
|
|
||||||
Some((first, count))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// This struct is created by [`SliceExt::group_by_key`].
|
/// This struct is created by [`SliceExt::group_by_key`].
|
||||||
pub struct GroupByKey<'a, T, F> {
|
pub struct GroupByKey<'a, T, F> {
|
||||||
slice: &'a [T],
|
slice: &'a [T],
|
||||||
|
Binary file not shown.
Before Width: | Height: | Size: 18 KiB After Width: | Height: | Size: 2.6 KiB |
Binary file not shown.
Before Width: | Height: | Size: 6.3 KiB After Width: | Height: | Size: 6.3 KiB |
18
tests/typ/text/shaping.typ
Normal file
18
tests/typ/text/shaping.typ
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
// Test shaping quirks.
|
||||||
|
|
||||||
|
---
|
||||||
|
// Test separation by script.
|
||||||
|
ABCअपार्टमेंट
|
||||||
|
|
||||||
|
// This is how it should look like.
|
||||||
|
अपार्टमेंट
|
||||||
|
|
||||||
|
// This (without the spaces) is how it would look
|
||||||
|
// if we didn't separate by script.
|
||||||
|
अ पा र् ट में ट
|
||||||
|
|
||||||
|
---
|
||||||
|
// Test that RTL safe-to-break doesn't panic even though newline
|
||||||
|
// doesn't exist in shaping output.
|
||||||
|
#set text(dir: rtl, "Noto Serif Hebrew")
|
||||||
|
\ ט
|
Loading…
x
Reference in New Issue
Block a user