mirror of
https://github.com/typst/typst
synced 2025-05-14 17:15:28 +08:00
Better smartquotes (#4849)
This commit is contained in:
parent
4e4c5175e5
commit
ef4482ce4b
@ -16,8 +16,6 @@ use crate::utils::Numeric;
|
|||||||
// paragraph's full text.
|
// paragraph's full text.
|
||||||
const SPACING_REPLACE: &str = " "; // Space
|
const SPACING_REPLACE: &str = " "; // Space
|
||||||
const OBJ_REPLACE: &str = "\u{FFFC}"; // Object Replacement Character
|
const OBJ_REPLACE: &str = "\u{FFFC}"; // Object Replacement Character
|
||||||
const SPACING_REPLACE_CHAR: char = ' ';
|
|
||||||
const OBJ_REPLACE_CHAR: char = '\u{FFFC}';
|
|
||||||
|
|
||||||
// Unicode BiDi control characters.
|
// Unicode BiDi control characters.
|
||||||
const LTR_EMBEDDING: &str = "\u{202A}";
|
const LTR_EMBEDDING: &str = "\u{202A}";
|
||||||
@ -125,8 +123,8 @@ pub fn collect<'a>(
|
|||||||
consecutive: bool,
|
consecutive: bool,
|
||||||
) -> SourceResult<(String, Vec<Segment<'a>>, SpanMapper)> {
|
) -> SourceResult<(String, Vec<Segment<'a>>, SpanMapper)> {
|
||||||
let mut collector = Collector::new(2 + children.len());
|
let mut collector = Collector::new(2 + children.len());
|
||||||
let mut iter = children.iter(styles).peekable();
|
|
||||||
let mut locator = locator.split();
|
let mut locator = locator.split();
|
||||||
|
let mut quoter = SmartQuoter::new();
|
||||||
|
|
||||||
let outer_dir = TextElem::dir_in(*styles);
|
let outer_dir = TextElem::dir_in(*styles);
|
||||||
let first_line_indent = ParElem::first_line_indent_in(*styles);
|
let first_line_indent = ParElem::first_line_indent_in(*styles);
|
||||||
@ -144,7 +142,7 @@ pub fn collect<'a>(
|
|||||||
collector.spans.push(1, Span::detached());
|
collector.spans.push(1, Span::detached());
|
||||||
}
|
}
|
||||||
|
|
||||||
while let Some((child, styles)) = iter.next() {
|
for (child, styles) in children.iter(styles) {
|
||||||
let prev_len = collector.full.len();
|
let prev_len = collector.full.len();
|
||||||
|
|
||||||
if child.is::<SpaceElem>() {
|
if child.is::<SpaceElem>() {
|
||||||
@ -191,32 +189,16 @@ pub fn collect<'a>(
|
|||||||
} else if let Some(elem) = child.to_packed::<SmartQuoteElem>() {
|
} else if let Some(elem) = child.to_packed::<SmartQuoteElem>() {
|
||||||
let double = elem.double(styles);
|
let double = elem.double(styles);
|
||||||
if elem.enabled(styles) {
|
if elem.enabled(styles) {
|
||||||
let quotes = SmartQuotes::new(
|
let quotes = SmartQuotes::get(
|
||||||
elem.quotes(styles),
|
elem.quotes(styles),
|
||||||
TextElem::lang_in(styles),
|
TextElem::lang_in(styles),
|
||||||
TextElem::region_in(styles),
|
TextElem::region_in(styles),
|
||||||
elem.alternative(styles),
|
elem.alternative(styles),
|
||||||
);
|
);
|
||||||
let peeked = iter.peek().and_then(|(child, _)| {
|
let before =
|
||||||
if let Some(elem) = child.to_packed::<TextElem>() {
|
collector.full.chars().rev().find(|&c| !is_default_ignorable(c));
|
||||||
elem.text().chars().find(|c| !is_default_ignorable(*c))
|
let quote = quoter.quote(before, "es, double);
|
||||||
} else if child.is::<SmartQuoteElem>() {
|
collector.push_text(quote, styles);
|
||||||
Some('"')
|
|
||||||
} else if child.is::<SpaceElem>()
|
|
||||||
|| child.is::<HElem>()
|
|
||||||
|| child.is::<LinebreakElem>()
|
|
||||||
// This is a temporary hack. We should rather skip these
|
|
||||||
// and peek at the next child.
|
|
||||||
|| child.is::<TagElem>()
|
|
||||||
{
|
|
||||||
Some(SPACING_REPLACE_CHAR)
|
|
||||||
} else {
|
|
||||||
Some(OBJ_REPLACE_CHAR)
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
let quote = collector.quoter.quote("es, double, peeked);
|
|
||||||
collector.push_quote(quote, styles);
|
|
||||||
} else {
|
} else {
|
||||||
collector.push_text(if double { "\"" } else { "'" }, styles);
|
collector.push_text(if double { "\"" } else { "'" }, styles);
|
||||||
}
|
}
|
||||||
@ -261,7 +243,6 @@ struct Collector<'a> {
|
|||||||
full: String,
|
full: String,
|
||||||
segments: Vec<Segment<'a>>,
|
segments: Vec<Segment<'a>>,
|
||||||
spans: SpanMapper,
|
spans: SpanMapper,
|
||||||
quoter: SmartQuoter,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> Collector<'a> {
|
impl<'a> Collector<'a> {
|
||||||
@ -270,13 +251,12 @@ impl<'a> Collector<'a> {
|
|||||||
full: String::new(),
|
full: String::new(),
|
||||||
segments: Vec::with_capacity(capacity),
|
segments: Vec::with_capacity(capacity),
|
||||||
spans: SpanMapper::new(),
|
spans: SpanMapper::new(),
|
||||||
quoter: SmartQuoter::new(),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn push_text(&mut self, text: &str, styles: StyleChain<'a>) {
|
fn push_text(&mut self, text: &str, styles: StyleChain<'a>) {
|
||||||
self.full.push_str(text);
|
self.full.push_str(text);
|
||||||
self.push_segment(Segment::Text(text.len(), styles), false);
|
self.push_segment(Segment::Text(text.len(), styles));
|
||||||
}
|
}
|
||||||
|
|
||||||
fn build_text<F>(&mut self, styles: StyleChain<'a>, f: F)
|
fn build_text<F>(&mut self, styles: StyleChain<'a>, f: F)
|
||||||
@ -286,24 +266,15 @@ impl<'a> Collector<'a> {
|
|||||||
let prev = self.full.len();
|
let prev = self.full.len();
|
||||||
f(&mut self.full);
|
f(&mut self.full);
|
||||||
let len = self.full.len() - prev;
|
let len = self.full.len() - prev;
|
||||||
self.push_segment(Segment::Text(len, styles), false);
|
self.push_segment(Segment::Text(len, styles));
|
||||||
}
|
|
||||||
|
|
||||||
fn push_quote(&mut self, quote: &str, styles: StyleChain<'a>) {
|
|
||||||
self.full.push_str(quote);
|
|
||||||
self.push_segment(Segment::Text(quote.len(), styles), true);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn push_item(&mut self, item: Item<'a>) {
|
fn push_item(&mut self, item: Item<'a>) {
|
||||||
self.full.push_str(item.textual());
|
self.full.push_str(item.textual());
|
||||||
self.push_segment(Segment::Item(item), false);
|
self.push_segment(Segment::Item(item));
|
||||||
}
|
}
|
||||||
|
|
||||||
fn push_segment(&mut self, segment: Segment<'a>, is_quote: bool) {
|
fn push_segment(&mut self, segment: Segment<'a>) {
|
||||||
if let Some(last) = self.full.chars().rev().find(|c| !is_default_ignorable(*c)) {
|
|
||||||
self.quoter.last(last, is_quote);
|
|
||||||
}
|
|
||||||
|
|
||||||
if let (Some(Segment::Text(last_len, last_styles)), Segment::Text(len, styles)) =
|
if let (Some(Segment::Text(last_len, last_styles)), Segment::Text(len, styles)) =
|
||||||
(self.segments.last_mut(), &segment)
|
(self.segments.last_mut(), &segment)
|
||||||
{
|
{
|
||||||
|
@ -159,7 +159,7 @@ impl Show for Packed<QuoteElem> {
|
|||||||
let block = self.block(styles);
|
let block = self.block(styles);
|
||||||
|
|
||||||
if self.quotes(styles) == Smart::Custom(true) || !block {
|
if self.quotes(styles) == Smart::Custom(true) || !block {
|
||||||
let quotes = SmartQuotes::new(
|
let quotes = SmartQuotes::get(
|
||||||
SmartQuoteElem::quotes_in(styles),
|
SmartQuoteElem::quotes_in(styles),
|
||||||
TextElem::lang_in(styles),
|
TextElem::lang_in(styles),
|
||||||
TextElem::region_in(styles),
|
TextElem::region_in(styles),
|
||||||
|
@ -97,68 +97,80 @@ impl PlainText for Packed<SmartQuoteElem> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// State machine for smart quote substitution.
|
/// A smart quote substitutor with zero lookahead.
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
pub struct SmartQuoter {
|
pub struct SmartQuoter {
|
||||||
/// How many quotes have been opened.
|
/// The amount of quotes that have been opened.
|
||||||
quote_depth: usize,
|
depth: u8,
|
||||||
/// Whether an opening quote might follow.
|
/// Each bit indicates whether the quote at this nesting depth is a double.
|
||||||
expect_opening: bool,
|
/// Maximum supported depth is thus 32.
|
||||||
/// Whether the last character was numeric.
|
kinds: u32,
|
||||||
last_num: bool,
|
|
||||||
/// The previous type of quote character, if it was an opening quote.
|
|
||||||
prev_quote_type: Option<bool>,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl SmartQuoter {
|
impl SmartQuoter {
|
||||||
/// Start quoting.
|
/// Start quoting.
|
||||||
pub fn new() -> Self {
|
pub fn new() -> Self {
|
||||||
Self {
|
Self { depth: 0, kinds: 0 }
|
||||||
quote_depth: 0,
|
|
||||||
expect_opening: true,
|
|
||||||
last_num: false,
|
|
||||||
prev_quote_type: None,
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Process the last seen character.
|
/// Determine which smart quote to substitute given this quoter's nesting
|
||||||
pub fn last(&mut self, c: char, is_quote: bool) {
|
/// state and the character immediately preceding the quote.
|
||||||
self.expect_opening = is_exterior_to_quote(c) || is_opening_bracket(c);
|
|
||||||
self.last_num = c.is_numeric();
|
|
||||||
if !is_quote {
|
|
||||||
self.prev_quote_type = None;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Process and substitute a quote.
|
|
||||||
pub fn quote<'a>(
|
pub fn quote<'a>(
|
||||||
&mut self,
|
&mut self,
|
||||||
|
before: Option<char>,
|
||||||
quotes: &SmartQuotes<'a>,
|
quotes: &SmartQuotes<'a>,
|
||||||
double: bool,
|
double: bool,
|
||||||
peeked: Option<char>,
|
|
||||||
) -> &'a str {
|
) -> &'a str {
|
||||||
let peeked = peeked.unwrap_or(' ');
|
let opened = self.top();
|
||||||
let mut expect_opening = self.expect_opening;
|
let before = before.unwrap_or(' ');
|
||||||
if let Some(prev_double) = self.prev_quote_type.take() {
|
|
||||||
if double != prev_double {
|
// If we are after a number and haven't most recently opened a quote of
|
||||||
expect_opening = true;
|
// this kind, produce a prime. Otherwise, we prefer a closing quote.
|
||||||
}
|
if before.is_numeric() && opened != Some(double) {
|
||||||
|
return if double { "″" } else { "′" };
|
||||||
}
|
}
|
||||||
|
|
||||||
if expect_opening {
|
// If we have a single smart quote, didn't recently open a single
|
||||||
self.quote_depth += 1;
|
// quotation, and are after an alphabetic char, interpret this as an
|
||||||
self.prev_quote_type = Some(double);
|
// apostrophe.
|
||||||
quotes.open(double)
|
if !double && opened != Some(false) && before.is_alphabetic() {
|
||||||
} else if self.quote_depth > 0
|
return "’";
|
||||||
&& (peeked.is_ascii_punctuation() || is_exterior_to_quote(peeked))
|
|
||||||
{
|
|
||||||
self.quote_depth -= 1;
|
|
||||||
quotes.close(double)
|
|
||||||
} else if self.last_num {
|
|
||||||
quotes.prime(double)
|
|
||||||
} else {
|
|
||||||
quotes.fallback(double)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// If the most recently opened quotation is of this kind and the
|
||||||
|
// previous char does not indicate a nested quotation, close it.
|
||||||
|
if opened == Some(double)
|
||||||
|
&& !before.is_whitespace()
|
||||||
|
&& !is_newline(before)
|
||||||
|
&& !is_opening_bracket(before)
|
||||||
|
{
|
||||||
|
self.pop();
|
||||||
|
return quotes.close(double);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Otherwise, open a new the quotation.
|
||||||
|
self.push(double);
|
||||||
|
quotes.open(double)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The top of our quotation stack. Returns `Some(double)` for the most
|
||||||
|
/// recently opened quote or `None` if we didn't open one.
|
||||||
|
fn top(&self) -> Option<bool> {
|
||||||
|
self.depth.checked_sub(1).map(|i| (self.kinds >> i) & 1 == 1)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Push onto the quotation stack.
|
||||||
|
fn push(&mut self, double: bool) {
|
||||||
|
if self.depth < 32 {
|
||||||
|
self.kinds |= (double as u32) << self.depth;
|
||||||
|
self.depth += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Pop from the quotation stack.
|
||||||
|
fn pop(&mut self) {
|
||||||
|
self.depth -= 1;
|
||||||
|
self.kinds &= (1 << self.depth) - 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -168,10 +180,7 @@ impl Default for SmartQuoter {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn is_exterior_to_quote(c: char) -> bool {
|
/// Whether the character is an opening bracket, parenthesis, or brace.
|
||||||
c.is_whitespace() || is_newline(c)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn is_opening_bracket(c: char) -> bool {
|
fn is_opening_bracket(c: char) -> bool {
|
||||||
matches!(c, '(' | '{' | '[')
|
matches!(c, '(' | '{' | '[')
|
||||||
}
|
}
|
||||||
@ -196,13 +205,13 @@ impl<'s> SmartQuotes<'s> {
|
|||||||
/// region as an all-uppercase ISO 3166-alpha2 code.
|
/// region as an all-uppercase ISO 3166-alpha2 code.
|
||||||
///
|
///
|
||||||
/// Currently, the supported languages are: English, Czech, Danish, German,
|
/// Currently, the supported languages are: English, Czech, Danish, German,
|
||||||
/// Swiss / Liechtensteinian German, Estonian, Icelandic, Italian, Latin, Lithuanian,
|
/// Swiss / Liechtensteinian German, Estonian, Icelandic, Italian, Latin,
|
||||||
/// Latvian, Slovak, Slovenian, Spanish, Bosnian, Finnish, Swedish, French,
|
/// Lithuanian, Latvian, Slovak, Slovenian, Spanish, Bosnian, Finnish,
|
||||||
/// Hungarian, Polish, Romanian, Japanese, Traditional Chinese, Russian, and
|
/// Swedish, French, Hungarian, Polish, Romanian, Japanese, Traditional
|
||||||
/// Norwegian.
|
/// Chinese, Russian, and Norwegian.
|
||||||
///
|
///
|
||||||
/// For unknown languages, the English quotes are used as fallback.
|
/// For unknown languages, the English quotes are used as fallback.
|
||||||
pub fn new(
|
pub fn get(
|
||||||
quotes: &'s Smart<SmartQuoteDict>,
|
quotes: &'s Smart<SmartQuoteDict>,
|
||||||
lang: Lang,
|
lang: Lang,
|
||||||
region: Option<Region>,
|
region: Option<Region>,
|
||||||
@ -281,24 +290,6 @@ impl<'s> SmartQuotes<'s> {
|
|||||||
self.single_close
|
self.single_close
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Which character should be used as a prime.
|
|
||||||
pub fn prime(&self, double: bool) -> &'static str {
|
|
||||||
if double {
|
|
||||||
"″"
|
|
||||||
} else {
|
|
||||||
"′"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Which character should be used as a fallback quote.
|
|
||||||
pub fn fallback(&self, double: bool) -> &'static str {
|
|
||||||
if double {
|
|
||||||
"\""
|
|
||||||
} else {
|
|
||||||
"’"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// An opening and closing quote.
|
/// An opening and closing quote.
|
||||||
|
BIN
tests/ref/smartquote-bracket.png
Normal file
BIN
tests/ref/smartquote-bracket.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 563 B |
BIN
tests/ref/smartquote-close-before-letter.png
Normal file
BIN
tests/ref/smartquote-close-before-letter.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 614 B |
Binary file not shown.
Before Width: | Height: | Size: 1.3 KiB After Width: | Height: | Size: 1.3 KiB |
BIN
tests/ref/smartquote-prime.png
Normal file
BIN
tests/ref/smartquote-prime.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 742 B |
BIN
tests/ref/smartquote-slash.png
Normal file
BIN
tests/ref/smartquote-slash.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 1.2 KiB |
@ -56,7 +56,28 @@ He said "I'm a big fella."
|
|||||||
|
|
||||||
--- smartquote-escape ---
|
--- smartquote-escape ---
|
||||||
// Test escape sequences.
|
// Test escape sequences.
|
||||||
The 5\'11\" 'quick\' brown fox jumps over the \"lazy" dog\'s ear.
|
The 5\'11\" 'quick\' brown fox jumps over the \"lazy' dog\'s ear.
|
||||||
|
|
||||||
|
--- smartquote-slash ---
|
||||||
|
// Test that smartquotes can open before non-whitespace if not nested.
|
||||||
|
"Hello"/"World" \
|
||||||
|
'"Hello"/"World"' \
|
||||||
|
""Hello"/"World""
|
||||||
|
|
||||||
|
--- smartquote-close-before-letter ---
|
||||||
|
// Test that smartquotes can close before alphabetic letters.
|
||||||
|
Straight "A"s and "B"s
|
||||||
|
|
||||||
|
--- smartquote-prime ---
|
||||||
|
// Test that primes result after numbers when possible.
|
||||||
|
A 2" nail. \
|
||||||
|
'A 2" nail.' \
|
||||||
|
"A 2" nail."
|
||||||
|
|
||||||
|
--- smartquote-bracket ---
|
||||||
|
// Test that brackets indicate an opening quote.
|
||||||
|
"a ["b"] c" \
|
||||||
|
"a b"c"d e"
|
||||||
|
|
||||||
--- smartquote-disable ---
|
--- smartquote-disable ---
|
||||||
// Test turning smart quotes off.
|
// Test turning smart quotes off.
|
||||||
|
Loading…
x
Reference in New Issue
Block a user