mirror of
https://github.com/typst/typst
synced 2025-05-18 11:05:28 +08:00
Linebreaking for links
This commit is contained in:
parent
4c75adbb04
commit
29130a26f8
@ -5,6 +5,7 @@ use icu_provider_blob::BlobDataProvider;
|
|||||||
use icu_segmenter::LineSegmenter;
|
use icu_segmenter::LineSegmenter;
|
||||||
use once_cell::sync::Lazy;
|
use once_cell::sync::Lazy;
|
||||||
use typst::doc::Lang;
|
use typst::doc::Lang;
|
||||||
|
use typst::syntax::link_prefix;
|
||||||
|
|
||||||
use super::TextElem;
|
use super::TextElem;
|
||||||
use crate::layout::Preparation;
|
use crate::layout::Preparation;
|
||||||
@ -82,25 +83,40 @@ pub(crate) fn breakpoints<'a>(
|
|||||||
p: &'a Preparation<'a>,
|
p: &'a Preparation<'a>,
|
||||||
mut f: impl FnMut(usize, Breakpoint),
|
mut f: impl FnMut(usize, Breakpoint),
|
||||||
) {
|
) {
|
||||||
|
let text = p.bidi.text;
|
||||||
|
let hyphenate = p.hyphenate != Some(false);
|
||||||
let lb = LINEBREAK_DATA.as_borrowed();
|
let lb = LINEBREAK_DATA.as_borrowed();
|
||||||
let segmenter = match p.lang {
|
let segmenter = match p.lang {
|
||||||
Some(Lang::CHINESE | Lang::JAPANESE) => &CJ_SEGMENTER,
|
Some(Lang::CHINESE | Lang::JAPANESE) => &CJ_SEGMENTER,
|
||||||
_ => &SEGMENTER,
|
_ => &SEGMENTER,
|
||||||
};
|
};
|
||||||
|
|
||||||
let hyphenate = p.hyphenate != Some(false);
|
|
||||||
let mut last = 0;
|
let mut last = 0;
|
||||||
|
let mut iter = segmenter.segment_str(text).peekable();
|
||||||
|
|
||||||
|
loop {
|
||||||
|
// Special case for links. UAX #14 doesn't handle them well.
|
||||||
|
let (head, tail) = text.split_at(last);
|
||||||
|
if head.ends_with("://") || tail.starts_with("www.") {
|
||||||
|
let (link, _) = link_prefix(tail);
|
||||||
|
let end = last + link.len();
|
||||||
|
linebreak_link(link, |i| f(last + i, Breakpoint::Normal));
|
||||||
|
while iter.peek().map_or(false, |&p| p <= end) {
|
||||||
|
iter.next();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get the UAX #14 linebreak opportunities.
|
||||||
|
let Some(point) = iter.next() else { break };
|
||||||
|
|
||||||
// Walk over all UAX #14 linebreak opportunities.
|
|
||||||
for point in segmenter.segment_str(p.bidi.text) {
|
|
||||||
// Skip breakpoint if there is no char before it. icu4x generates one
|
// Skip breakpoint if there is no char before it. icu4x generates one
|
||||||
// at offset 0, but we don't want it.
|
// at offset 0, but we don't want it.
|
||||||
let Some(c) = p.bidi.text[..point].chars().next_back() else { continue };
|
let Some(c) = text[..point].chars().next_back() else { continue };
|
||||||
|
|
||||||
// Find out whether the last break was mandatory by checking against
|
// Find out whether the last break was mandatory by checking against
|
||||||
// rules LB4 and LB5, special-casing the end of text according to LB3.
|
// rules LB4 and LB5, special-casing the end of text according to LB3.
|
||||||
// See also: https://docs.rs/icu_segmenter/latest/icu_segmenter/struct.LineSegmenter.html
|
// See also: https://docs.rs/icu_segmenter/latest/icu_segmenter/struct.LineSegmenter.html
|
||||||
let breakpoint = if point == p.bidi.text.len() {
|
let breakpoint = if point == text.len() {
|
||||||
Breakpoint::Mandatory
|
Breakpoint::Mandatory
|
||||||
} else {
|
} else {
|
||||||
match lb.get(c) {
|
match lb.get(c) {
|
||||||
@ -121,8 +137,7 @@ pub(crate) fn breakpoints<'a>(
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Extract a hyphenatable "word".
|
// Extract a hyphenatable "word".
|
||||||
let word =
|
let word = &text[last..point].trim_end_matches(|c: char| !c.is_alphabetic());
|
||||||
&p.bidi.text[last..point].trim_end_matches(|c: char| !c.is_alphabetic());
|
|
||||||
if word.is_empty() {
|
if word.is_empty() {
|
||||||
break 'hyphenate;
|
break 'hyphenate;
|
||||||
}
|
}
|
||||||
@ -166,6 +181,69 @@ pub(crate) fn breakpoints<'a>(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Produce linebreak opportunities for a link.
|
||||||
|
fn linebreak_link(link: &str, mut f: impl FnMut(usize)) {
|
||||||
|
#[derive(PartialEq)]
|
||||||
|
enum Class {
|
||||||
|
Alphabetic,
|
||||||
|
Digit,
|
||||||
|
Open,
|
||||||
|
Other,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Class {
|
||||||
|
fn of(c: char) -> Self {
|
||||||
|
if c.is_alphabetic() {
|
||||||
|
Class::Alphabetic
|
||||||
|
} else if c.is_numeric() {
|
||||||
|
Class::Digit
|
||||||
|
} else if matches!(c, '(' | '[') {
|
||||||
|
Class::Open
|
||||||
|
} else {
|
||||||
|
Class::Other
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut offset = 0;
|
||||||
|
let mut emit = |end: usize| {
|
||||||
|
let piece = &link[offset..end];
|
||||||
|
if piece.len() < 16 {
|
||||||
|
// For bearably long segments, emit them as one.
|
||||||
|
offset = end;
|
||||||
|
f(offset);
|
||||||
|
} else {
|
||||||
|
// If it gets very long (e.g. a hash in the URL), just allow a
|
||||||
|
// break at every char.
|
||||||
|
for c in piece.chars() {
|
||||||
|
offset += c.len_utf8();
|
||||||
|
f(offset);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut prev = Class::Other;
|
||||||
|
for (end, c) in link.char_indices() {
|
||||||
|
let class = Class::of(c);
|
||||||
|
|
||||||
|
// Emit opportunities when going from
|
||||||
|
// - other -> other
|
||||||
|
// - alphabetic -> numeric
|
||||||
|
// - numeric -> alphabetic
|
||||||
|
// Never before after opening delimiters.
|
||||||
|
if end > 0
|
||||||
|
&& prev != Class::Open
|
||||||
|
&& if class == Class::Other { prev == Class::Other } else { class != prev }
|
||||||
|
{
|
||||||
|
emit(end);
|
||||||
|
}
|
||||||
|
|
||||||
|
prev = class;
|
||||||
|
}
|
||||||
|
|
||||||
|
emit(link.len());
|
||||||
|
}
|
||||||
|
|
||||||
/// Whether hyphenation is enabled at the given offset.
|
/// Whether hyphenation is enabled at the given offset.
|
||||||
fn hyphenate_at(p: &Preparation, offset: usize) -> bool {
|
fn hyphenate_at(p: &Preparation, offset: usize) -> bool {
|
||||||
p.hyphenate
|
p.hyphenate
|
||||||
|
@ -253,43 +253,16 @@ impl Lexer<'_> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn link(&mut self) -> SyntaxKind {
|
fn link(&mut self) -> SyntaxKind {
|
||||||
let mut brackets = Vec::new();
|
let (link, balanced) = link_prefix(self.s.after());
|
||||||
|
self.s.jump(self.s.cursor() + link.len());
|
||||||
|
|
||||||
#[rustfmt::skip]
|
if !balanced {
|
||||||
self.s.eat_while(|c: char| {
|
|
||||||
match c {
|
|
||||||
| '0' ..= '9'
|
|
||||||
| 'a' ..= 'z'
|
|
||||||
| 'A' ..= 'Z'
|
|
||||||
| '!' | '#' | '$' | '%' | '&' | '*' | '+'
|
|
||||||
| ',' | '-' | '.' | '/' | ':' | ';' | '='
|
|
||||||
| '?' | '@' | '_' | '~' | '\'' => true,
|
|
||||||
'[' => {
|
|
||||||
brackets.push(SyntaxKind::LeftBracket);
|
|
||||||
true
|
|
||||||
}
|
|
||||||
'(' => {
|
|
||||||
brackets.push(SyntaxKind::LeftParen);
|
|
||||||
true
|
|
||||||
}
|
|
||||||
']' => brackets.pop() == Some(SyntaxKind::LeftBracket),
|
|
||||||
')' => brackets.pop() == Some(SyntaxKind::LeftParen),
|
|
||||||
_ => false,
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
if !brackets.is_empty() {
|
|
||||||
return self.error(
|
return self.error(
|
||||||
"automatic links cannot contain unbalanced brackets, \
|
"automatic links cannot contain unbalanced brackets, \
|
||||||
use the `link` function instead",
|
use the `link` function instead",
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Don't include the trailing characters likely to be part of text.
|
|
||||||
while matches!(self.s.scout(-1), Some('!' | ',' | '.' | ':' | ';' | '?' | '\'')) {
|
|
||||||
self.s.uneat();
|
|
||||||
}
|
|
||||||
|
|
||||||
SyntaxKind::Link
|
SyntaxKind::Link
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -662,6 +635,43 @@ pub fn is_newline(character: char) -> bool {
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Extracts a prefix of the text that is a link and also returns whether the
|
||||||
|
/// parentheses and brackets in the link were balanced.
|
||||||
|
pub fn link_prefix(text: &str) -> (&str, bool) {
|
||||||
|
let mut s = unscanny::Scanner::new(text);
|
||||||
|
let mut brackets = Vec::new();
|
||||||
|
|
||||||
|
#[rustfmt::skip]
|
||||||
|
s.eat_while(|c: char| {
|
||||||
|
match c {
|
||||||
|
| '0' ..= '9'
|
||||||
|
| 'a' ..= 'z'
|
||||||
|
| 'A' ..= 'Z'
|
||||||
|
| '!' | '#' | '$' | '%' | '&' | '*' | '+'
|
||||||
|
| ',' | '-' | '.' | '/' | ':' | ';' | '='
|
||||||
|
| '?' | '@' | '_' | '~' | '\'' => true,
|
||||||
|
'[' => {
|
||||||
|
brackets.push(b'[');
|
||||||
|
true
|
||||||
|
}
|
||||||
|
'(' => {
|
||||||
|
brackets.push(b'(');
|
||||||
|
true
|
||||||
|
}
|
||||||
|
']' => brackets.pop() == Some(b'['),
|
||||||
|
')' => brackets.pop() == Some(b'('),
|
||||||
|
_ => false,
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Don't include the trailing characters likely to be part of text.
|
||||||
|
while matches!(s.scout(-1), Some('!' | ',' | '.' | ':' | ';' | '?' | '\'')) {
|
||||||
|
s.uneat();
|
||||||
|
}
|
||||||
|
|
||||||
|
(s.before(), brackets.is_empty())
|
||||||
|
}
|
||||||
|
|
||||||
/// Split text at newlines.
|
/// Split text at newlines.
|
||||||
pub(super) fn split_newlines(text: &str) -> Vec<&str> {
|
pub(super) fn split_newlines(text: &str) -> Vec<&str> {
|
||||||
let mut s = Scanner::new(text);
|
let mut s = Scanner::new(text);
|
||||||
|
@ -15,7 +15,7 @@ mod span;
|
|||||||
pub use self::file::{FileId, PackageSpec, PackageVersion, VirtualPath};
|
pub use self::file::{FileId, PackageSpec, PackageVersion, VirtualPath};
|
||||||
pub use self::highlight::{highlight, highlight_html, Tag};
|
pub use self::highlight::{highlight, highlight_html, Tag};
|
||||||
pub use self::kind::SyntaxKind;
|
pub use self::kind::SyntaxKind;
|
||||||
pub use self::lexer::{is_id_continue, is_id_start, is_ident, is_newline};
|
pub use self::lexer::{is_id_continue, is_id_start, is_ident, is_newline, link_prefix};
|
||||||
pub use self::node::{LinkedChildren, LinkedNode, SyntaxError, SyntaxNode};
|
pub use self::node::{LinkedChildren, LinkedNode, SyntaxError, SyntaxNode};
|
||||||
pub use self::parser::{parse, parse_code, parse_math};
|
pub use self::parser::{parse, parse_code, parse_math};
|
||||||
pub use self::source::Source;
|
pub use self::source::Source;
|
||||||
|
Binary file not shown.
Before Width: | Height: | Size: 70 KiB After Width: | Height: | Size: 70 KiB |
BIN
tests/ref/text/linebreak-link.png
Normal file
BIN
tests/ref/text/linebreak-link.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 65 KiB |
16
tests/typ/text/linebreak-link.typ
Normal file
16
tests/typ/text/linebreak-link.typ
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
// Test linebreaking of links.
|
||||||
|
|
||||||
|
---
|
||||||
|
#link("https://example.com/(ab") \
|
||||||
|
#link("https://example.com/(ab)") \
|
||||||
|
#link("https://example.com/(paren)") \
|
||||||
|
#link("https://example.com/paren)") \
|
||||||
|
#link("https://hi.com/%%%%%%%%abcdef") \
|
||||||
|
|
||||||
|
---
|
||||||
|
#set page(width: 240pt)
|
||||||
|
#set par(justify: true)
|
||||||
|
|
||||||
|
Here's a link https://url.com/data/extern12840%data_urlenc and then there are more
|
||||||
|
links #link("www.url.com/data/extern12840%data_urlenc") in my text of links
|
||||||
|
http://mydataurl/hash/12098541029831025981024980124124214/incremental/progress%linkdata_information_setup_my_link_just_never_stops_going/on?query=false
|
Loading…
x
Reference in New Issue
Block a user