Smart quotes
Co-Authored-By: Martin Haug <mhaug@live.de>
@ -3,12 +3,14 @@
|
|||||||
mod deco;
|
mod deco;
|
||||||
mod link;
|
mod link;
|
||||||
mod par;
|
mod par;
|
||||||
|
mod quotes;
|
||||||
mod raw;
|
mod raw;
|
||||||
mod shaping;
|
mod shaping;
|
||||||
|
|
||||||
pub use deco::*;
|
pub use deco::*;
|
||||||
pub use link::*;
|
pub use link::*;
|
||||||
pub use par::*;
|
pub use par::*;
|
||||||
|
pub use quotes::*;
|
||||||
pub use raw::*;
|
pub use raw::*;
|
||||||
pub use shaping::*;
|
pub use shaping::*;
|
||||||
|
|
||||||
@ -72,6 +74,8 @@ impl TextNode {
|
|||||||
/// will will be hyphenated if and only if justification is enabled.
|
/// will will be hyphenated if and only if justification is enabled.
|
||||||
#[property(resolve)]
|
#[property(resolve)]
|
||||||
pub const HYPHENATE: Smart<Hyphenate> = Smart::Auto;
|
pub const HYPHENATE: Smart<Hyphenate> = Smart::Auto;
|
||||||
|
/// Whether to apply smart quotes.
|
||||||
|
pub const SMART_QUOTES: bool = true;
|
||||||
|
|
||||||
/// Whether to apply kerning ("kern").
|
/// Whether to apply kerning ("kern").
|
||||||
pub const KERNING: bool = true;
|
pub const KERNING: bool = true;
|
||||||
|
@ -4,7 +4,7 @@ use unicode_bidi::{BidiInfo, Level};
|
|||||||
use unicode_script::{Script, UnicodeScript};
|
use unicode_script::{Script, UnicodeScript};
|
||||||
use xi_unicode::LineBreakIterator;
|
use xi_unicode::LineBreakIterator;
|
||||||
|
|
||||||
use super::{shape, Lang, ShapedText, TextNode};
|
use super::{shape, Lang, Quoter, Quotes, ShapedText, TextNode};
|
||||||
use crate::font::FontStore;
|
use crate::font::FontStore;
|
||||||
use crate::library::layout::Spacing;
|
use crate::library::layout::Spacing;
|
||||||
use crate::library::prelude::*;
|
use crate::library::prelude::*;
|
||||||
@ -386,9 +386,11 @@ fn collect<'a>(
|
|||||||
styles: &'a StyleChain<'a>,
|
styles: &'a StyleChain<'a>,
|
||||||
) -> (String, Vec<(Segment<'a>, StyleChain<'a>)>) {
|
) -> (String, Vec<(Segment<'a>, StyleChain<'a>)>) {
|
||||||
let mut full = String::new();
|
let mut full = String::new();
|
||||||
|
let mut quoter = Quoter::new();
|
||||||
let mut segments = vec![];
|
let mut segments = vec![];
|
||||||
|
let mut iter = par.0.iter().peekable();
|
||||||
|
|
||||||
for (child, map) in par.0.iter() {
|
while let Some((child, map)) = iter.next() {
|
||||||
let styles = map.chain(&styles);
|
let styles = map.chain(&styles);
|
||||||
let segment = match child {
|
let segment = match child {
|
||||||
ParChild::Text(text) => {
|
ParChild::Text(text) => {
|
||||||
@ -402,7 +404,25 @@ fn collect<'a>(
|
|||||||
}
|
}
|
||||||
ParChild::Quote(double) => {
|
ParChild::Quote(double) => {
|
||||||
let prev = full.len();
|
let prev = full.len();
|
||||||
full.push(if *double { '"' } else { '\'' });
|
if styles.get(TextNode::SMART_QUOTES) {
|
||||||
|
// TODO: Also get region.
|
||||||
|
let lang = styles.get(TextNode::LANG);
|
||||||
|
let quotes = lang
|
||||||
|
.as_ref()
|
||||||
|
.map(|lang| Quotes::from_lang(lang.as_str(), ""))
|
||||||
|
.unwrap_or_default();
|
||||||
|
|
||||||
|
let peeked = iter.peek().and_then(|(child, _)| match child {
|
||||||
|
ParChild::Text(text) => text.chars().next(),
|
||||||
|
ParChild::Quote(_) => Some('"'),
|
||||||
|
ParChild::Spacing(_) => Some(SPACING_REPLACE),
|
||||||
|
ParChild::Node(_) => Some(NODE_REPLACE),
|
||||||
|
});
|
||||||
|
|
||||||
|
full.push_str(quoter.quote("es, *double, peeked));
|
||||||
|
} else {
|
||||||
|
full.push(if *double { '"' } else { '\'' });
|
||||||
|
}
|
||||||
Segment::Text(full.len() - prev)
|
Segment::Text(full.len() - prev)
|
||||||
}
|
}
|
||||||
ParChild::Spacing(spacing) => {
|
ParChild::Spacing(spacing) => {
|
||||||
@ -415,6 +435,10 @@ fn collect<'a>(
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
if let Some(last) = full.chars().last() {
|
||||||
|
quoter.last(last);
|
||||||
|
}
|
||||||
|
|
||||||
if let (Some((Segment::Text(last_len), last_styles)), Segment::Text(len)) =
|
if let (Some((Segment::Text(last_len), last_styles)), Segment::Text(len)) =
|
||||||
(segments.last_mut(), segment)
|
(segments.last_mut(), segment)
|
||||||
{
|
{
|
||||||
|
146
src/library/text/quotes.rs
Normal file
@ -0,0 +1,146 @@
|
|||||||
|
use crate::parse::is_newline;
|
||||||
|
|
||||||
|
/// State machine for smart quote subtitution.
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct Quoter {
|
||||||
|
/// How many quotes have been opened.
|
||||||
|
quote_depth: usize,
|
||||||
|
/// Whether an opening quote might follow.
|
||||||
|
expect_opening: bool,
|
||||||
|
/// Whether the last character was numeric.
|
||||||
|
last_num: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Quoter {
|
||||||
|
/// Start quoting.
|
||||||
|
pub fn new() -> Self {
|
||||||
|
Self {
|
||||||
|
quote_depth: 0,
|
||||||
|
expect_opening: true,
|
||||||
|
last_num: false,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Process the last seen character.
|
||||||
|
pub fn last(&mut self, c: char) {
|
||||||
|
self.expect_opening = is_ignorable(c) || is_opening_bracket(c);
|
||||||
|
self.last_num = c.is_numeric();
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Process and substitute a quote.
|
||||||
|
pub fn quote<'a>(
|
||||||
|
&mut self,
|
||||||
|
quotes: &Quotes<'a>,
|
||||||
|
double: bool,
|
||||||
|
peeked: Option<char>,
|
||||||
|
) -> &'a str {
|
||||||
|
let peeked = peeked.unwrap_or(' ');
|
||||||
|
if self.expect_opening {
|
||||||
|
self.quote_depth += 1;
|
||||||
|
quotes.open(double)
|
||||||
|
} else if self.quote_depth > 0
|
||||||
|
&& (peeked.is_ascii_punctuation() || is_ignorable(peeked))
|
||||||
|
{
|
||||||
|
self.quote_depth -= 1;
|
||||||
|
quotes.close(double)
|
||||||
|
} else if self.last_num {
|
||||||
|
quotes.prime(double)
|
||||||
|
} else {
|
||||||
|
quotes.fallback(double)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for Quoter {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self::new()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_ignorable(c: char) -> bool {
|
||||||
|
c.is_whitespace() || is_newline(c)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_opening_bracket(c: char) -> bool {
|
||||||
|
matches!(c, '(' | '{' | '[')
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Decides which quotes to subtitute smart quotes with.
|
||||||
|
pub struct Quotes<'s> {
|
||||||
|
/// The opening single quote.
|
||||||
|
pub single_open: &'s str,
|
||||||
|
/// The closing single quote.
|
||||||
|
pub single_close: &'s str,
|
||||||
|
/// The opening double quote.
|
||||||
|
pub double_open: &'s str,
|
||||||
|
/// The closing double quote.
|
||||||
|
pub double_close: &'s str,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'s> Quotes<'s> {
|
||||||
|
/// Create a new `Quotes` struct with the defaults for a language and
|
||||||
|
/// region.
|
||||||
|
///
|
||||||
|
/// The language should be specified as an all-lowercase ISO 639-1 code, the
|
||||||
|
/// region as an all-uppercase ISO 3166-alpha2 code.
|
||||||
|
///
|
||||||
|
/// Currently, the supported languages are: English, Czech, Danish, German,
|
||||||
|
/// Swiss / Liechtensteinian German, Estonian, Icelandic, Lithuanian,
|
||||||
|
/// Latvian, Slovak, Slovenian, Bosnian, Finnish, Swedish, French,
|
||||||
|
/// Hungarian, Polish, Romanian, Japanese, Traditional Chinese, Russian, and
|
||||||
|
/// Norwegian.
|
||||||
|
///
|
||||||
|
/// For unknown languages, the English quotes are used.
|
||||||
|
pub fn from_lang(language: &str, region: &str) -> Self {
|
||||||
|
let (single_open, single_close, double_open, double_close) = match language {
|
||||||
|
"de" if matches!(region, "CH" | "LI") => ("‹", "›", "«", "»"),
|
||||||
|
"cs" | "da" | "de" | "et" | "is" | "lt" | "lv" | "sk" | "sl" => {
|
||||||
|
("‚", "‘", "„", "“")
|
||||||
|
}
|
||||||
|
"fr" => ("‹\u{00A0}", "\u{00A0}›", "«\u{00A0}", "\u{00A0}»"),
|
||||||
|
"bs" | "fi" | "sv" => ("’", "’", "”", "”"),
|
||||||
|
"hu" | "pl" | "ro" => ("’", "’", "„", "”"),
|
||||||
|
"ru" | "no" | "nn" => ("’", "’", "«", "»"),
|
||||||
|
_ => return Self::default(),
|
||||||
|
};
|
||||||
|
|
||||||
|
Self {
|
||||||
|
single_open,
|
||||||
|
single_close,
|
||||||
|
double_open,
|
||||||
|
double_close,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The opening quote.
|
||||||
|
fn open(&self, double: bool) -> &'s str {
|
||||||
|
if double { self.double_open } else { self.single_open }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The closing quote.
|
||||||
|
fn close(&self, double: bool) -> &'s str {
|
||||||
|
if double { self.double_close } else { self.single_close }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Which character should be used as a prime.
|
||||||
|
fn prime(&self, double: bool) -> &'static str {
|
||||||
|
if double { "″" } else { "′" }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Which character should be used as a fallback quote.
|
||||||
|
fn fallback(&self, double: bool) -> &'static str {
|
||||||
|
if double { "\"" } else { "’" }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for Quotes<'_> {
|
||||||
|
/// Returns the english quotes as default.
|
||||||
|
fn default() -> Self {
|
||||||
|
Self {
|
||||||
|
single_open: "‘",
|
||||||
|
single_close: "’",
|
||||||
|
double_open: "“",
|
||||||
|
double_close: "”",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -100,6 +100,7 @@ impl Show for RawNode {
|
|||||||
let mut map = StyleMap::new();
|
let mut map = StyleMap::new();
|
||||||
map.set(TextNode::OVERHANG, false);
|
map.set(TextNode::OVERHANG, false);
|
||||||
map.set(TextNode::HYPHENATE, Smart::Custom(Hyphenate(false)));
|
map.set(TextNode::HYPHENATE, Smart::Custom(Hyphenate(false)));
|
||||||
|
map.set(TextNode::SMART_QUOTES, false);
|
||||||
|
|
||||||
if let Smart::Custom(family) = styles.get(Self::FAMILY) {
|
if let Smart::Custom(family) = styles.get(Self::FAMILY) {
|
||||||
map.set_family(family.clone(), styles);
|
map.set_family(family.clone(), styles);
|
||||||
|
Before Width: | Height: | Size: 801 B After Width: | Height: | Size: 520 B |
Before Width: | Height: | Size: 47 KiB After Width: | Height: | Size: 47 KiB |
Before Width: | Height: | Size: 106 KiB After Width: | Height: | Size: 120 KiB |
Before Width: | Height: | Size: 58 KiB After Width: | Height: | Size: 57 KiB |
Before Width: | Height: | Size: 13 KiB After Width: | Height: | Size: 13 KiB |
Before Width: | Height: | Size: 20 KiB After Width: | Height: | Size: 20 KiB |
Before Width: | Height: | Size: 29 KiB After Width: | Height: | Size: 29 KiB |
BIN
tests/ref/text/quotes.png
Normal file
After Width: | Height: | Size: 58 KiB |
Before Width: | Height: | Size: 6.3 KiB After Width: | Height: | Size: 6.3 KiB |
@ -5,11 +5,10 @@
|
|||||||
// Don't parse closure directly in content.
|
// Don't parse closure directly in content.
|
||||||
// Ref: true
|
// Ref: true
|
||||||
|
|
||||||
#let x = "\"hi\""
|
#let x = "x"
|
||||||
|
|
||||||
// Should output `"hi" => "bye"`.
|
// Should output `x => y`.
|
||||||
#set text(overhang: false)
|
#x => y
|
||||||
#x => "bye"
|
|
||||||
|
|
||||||
---
|
---
|
||||||
// Basic closure without captures.
|
// Basic closure without captures.
|
||||||
|
@ -2,7 +2,8 @@
|
|||||||
|
|
||||||
---
|
---
|
||||||
// Escapable symbols.
|
// Escapable symbols.
|
||||||
\\ \/ \[ \] \{ \} \# \* \_ \= \~ \` \$
|
\\ \/ \[ \] \{ \} \# \* \_ \
|
||||||
|
\= \~ \` \$ \" \'
|
||||||
|
|
||||||
// No need to escape.
|
// No need to escape.
|
||||||
( ) ; < >
|
( ) ; < >
|
||||||
|
54
tests/typ/text/quotes.typ
Normal file
@ -0,0 +1,54 @@
|
|||||||
|
// Test smart quotes.
|
||||||
|
|
||||||
|
---
|
||||||
|
#set page(width: 200pt)
|
||||||
|
|
||||||
|
// Test simple quotations in various languages.
|
||||||
|
#set text(lang: "en")
|
||||||
|
"The horse eats no cucumber salad" was the first sentence ever uttered on the 'telephone.'
|
||||||
|
|
||||||
|
#set text(lang: "de")
|
||||||
|
"Das Pferd frisst keinen Gurkensalat" war der erste jemals am 'Fernsprecher' gesagte Satz.
|
||||||
|
|
||||||
|
#set text(lang: "fr")
|
||||||
|
"Le cheval ne mange pas de salade de concombres" est la première phrase jamais prononcée au 'téléphone'.
|
||||||
|
|
||||||
|
#set text(lang: "fi")
|
||||||
|
"Hevonen ei syö kurkkusalaattia" oli ensimmäinen koskaan 'puhelimessa' lausuttu lause.
|
||||||
|
|
||||||
|
#set text(lang: "ro")
|
||||||
|
"Calul nu mănâncă salată de castraveți" a fost prima propoziție rostită vreodată la 'telefon'.
|
||||||
|
|
||||||
|
#set text(lang: "ru")
|
||||||
|
"Лошадь не ест салат из огурцов" - это была первая фраза, сказанная по 'телефону'.
|
||||||
|
|
||||||
|
---
|
||||||
|
// Test single pair of quotes.
|
||||||
|
#set text(lang: "en")
|
||||||
|
""
|
||||||
|
|
||||||
|
---
|
||||||
|
// Test sentences with numbers and apostrophes.
|
||||||
|
#set text(lang: "en")
|
||||||
|
The 5'11" 'quick' brown fox jumps over the "lazy" dog's ear.
|
||||||
|
|
||||||
|
He said "I'm a big fella."
|
||||||
|
|
||||||
|
---
|
||||||
|
// Test escape sequences.
|
||||||
|
The 5\'11\" 'quick\' brown fox jumps over the \"lazy" dog\'s ear.
|
||||||
|
|
||||||
|
---
|
||||||
|
// Test turning smart quotes off.
|
||||||
|
#set text(lang: "en")
|
||||||
|
He's told some books contain questionable "example text".
|
||||||
|
|
||||||
|
#set text(smart-quotes: false)
|
||||||
|
He's told some books contain questionable "example text".
|
||||||
|
|
||||||
|
---
|
||||||
|
// Test changing properties within text.
|
||||||
|
#set text(lang: "en")
|
||||||
|
"She suddenly started speaking french: #text(lang: "fr")['Je suis une banane.']" Roman told me.
|
||||||
|
|
||||||
|
Some people's thought on this would be #text(smart-quotes: false)["strange."]
|