diff --git a/crates/typst/src/layout/inline/collect.rs b/crates/typst/src/layout/inline/collect.rs index 53c684d15..624eedf32 100644 --- a/crates/typst/src/layout/inline/collect.rs +++ b/crates/typst/src/layout/inline/collect.rs @@ -16,8 +16,6 @@ use crate::utils::Numeric; // paragraph's full text. const SPACING_REPLACE: &str = " "; // Space const OBJ_REPLACE: &str = "\u{FFFC}"; // Object Replacement Character -const SPACING_REPLACE_CHAR: char = ' '; -const OBJ_REPLACE_CHAR: char = '\u{FFFC}'; // Unicode BiDi control characters. const LTR_EMBEDDING: &str = "\u{202A}"; @@ -125,8 +123,8 @@ pub fn collect<'a>( consecutive: bool, ) -> SourceResult<(String, Vec>, SpanMapper)> { let mut collector = Collector::new(2 + children.len()); - let mut iter = children.iter(styles).peekable(); let mut locator = locator.split(); + let mut quoter = SmartQuoter::new(); let outer_dir = TextElem::dir_in(*styles); let first_line_indent = ParElem::first_line_indent_in(*styles); @@ -144,7 +142,7 @@ pub fn collect<'a>( collector.spans.push(1, Span::detached()); } - while let Some((child, styles)) = iter.next() { + for (child, styles) in children.iter(styles) { let prev_len = collector.full.len(); if child.is::() { @@ -191,32 +189,16 @@ pub fn collect<'a>( } else if let Some(elem) = child.to_packed::() { let double = elem.double(styles); if elem.enabled(styles) { - let quotes = SmartQuotes::new( + let quotes = SmartQuotes::get( elem.quotes(styles), TextElem::lang_in(styles), TextElem::region_in(styles), elem.alternative(styles), ); - let peeked = iter.peek().and_then(|(child, _)| { - if let Some(elem) = child.to_packed::() { - elem.text().chars().find(|c| !is_default_ignorable(*c)) - } else if child.is::() { - Some('"') - } else if child.is::() - || child.is::() - || child.is::() - // This is a temporary hack. We should rather skip these - // and peek at the next child. - || child.is::() - { - Some(SPACING_REPLACE_CHAR) - } else { - Some(OBJ_REPLACE_CHAR) - } - }); - - let quote = collector.quoter.quote("es, double, peeked); - collector.push_quote(quote, styles); + let before = + collector.full.chars().rev().find(|&c| !is_default_ignorable(c)); + let quote = quoter.quote(before, "es, double); + collector.push_text(quote, styles); } else { collector.push_text(if double { "\"" } else { "'" }, styles); } @@ -261,7 +243,6 @@ struct Collector<'a> { full: String, segments: Vec>, spans: SpanMapper, - quoter: SmartQuoter, } impl<'a> Collector<'a> { @@ -270,13 +251,12 @@ impl<'a> Collector<'a> { full: String::new(), segments: Vec::with_capacity(capacity), spans: SpanMapper::new(), - quoter: SmartQuoter::new(), } } fn push_text(&mut self, text: &str, styles: StyleChain<'a>) { self.full.push_str(text); - self.push_segment(Segment::Text(text.len(), styles), false); + self.push_segment(Segment::Text(text.len(), styles)); } fn build_text(&mut self, styles: StyleChain<'a>, f: F) @@ -286,24 +266,15 @@ impl<'a> Collector<'a> { let prev = self.full.len(); f(&mut self.full); let len = self.full.len() - prev; - self.push_segment(Segment::Text(len, styles), false); - } - - fn push_quote(&mut self, quote: &str, styles: StyleChain<'a>) { - self.full.push_str(quote); - self.push_segment(Segment::Text(quote.len(), styles), true); + self.push_segment(Segment::Text(len, styles)); } fn push_item(&mut self, item: Item<'a>) { self.full.push_str(item.textual()); - self.push_segment(Segment::Item(item), false); + self.push_segment(Segment::Item(item)); } - fn push_segment(&mut self, segment: Segment<'a>, is_quote: bool) { - if let Some(last) = self.full.chars().rev().find(|c| !is_default_ignorable(*c)) { - self.quoter.last(last, is_quote); - } - + fn push_segment(&mut self, segment: Segment<'a>) { if let (Some(Segment::Text(last_len, last_styles)), Segment::Text(len, styles)) = (self.segments.last_mut(), &segment) { diff --git a/crates/typst/src/model/quote.rs b/crates/typst/src/model/quote.rs index 65a809dca..528c0998e 100644 --- a/crates/typst/src/model/quote.rs +++ b/crates/typst/src/model/quote.rs @@ -159,7 +159,7 @@ impl Show for Packed { let block = self.block(styles); if self.quotes(styles) == Smart::Custom(true) || !block { - let quotes = SmartQuotes::new( + let quotes = SmartQuotes::get( SmartQuoteElem::quotes_in(styles), TextElem::lang_in(styles), TextElem::region_in(styles), diff --git a/crates/typst/src/text/smartquote.rs b/crates/typst/src/text/smartquote.rs index 64fecb768..02c93fd6b 100644 --- a/crates/typst/src/text/smartquote.rs +++ b/crates/typst/src/text/smartquote.rs @@ -97,68 +97,80 @@ impl PlainText for Packed { } } -/// State machine for smart quote substitution. +/// A smart quote substitutor with zero lookahead. #[derive(Debug, Clone)] pub struct SmartQuoter { - /// How many quotes have been opened. - quote_depth: usize, - /// Whether an opening quote might follow. - expect_opening: bool, - /// Whether the last character was numeric. - last_num: bool, - /// The previous type of quote character, if it was an opening quote. - prev_quote_type: Option, + /// The amount of quotes that have been opened. + depth: u8, + /// Each bit indicates whether the quote at this nesting depth is a double. + /// Maximum supported depth is thus 32. + kinds: u32, } impl SmartQuoter { /// Start quoting. pub fn new() -> Self { - Self { - quote_depth: 0, - expect_opening: true, - last_num: false, - prev_quote_type: None, - } + Self { depth: 0, kinds: 0 } } - /// Process the last seen character. - pub fn last(&mut self, c: char, is_quote: bool) { - self.expect_opening = is_exterior_to_quote(c) || is_opening_bracket(c); - self.last_num = c.is_numeric(); - if !is_quote { - self.prev_quote_type = None; - } - } - - /// Process and substitute a quote. + /// Determine which smart quote to substitute given this quoter's nesting + /// state and the character immediately preceding the quote. pub fn quote<'a>( &mut self, + before: Option, quotes: &SmartQuotes<'a>, double: bool, - peeked: Option, ) -> &'a str { - let peeked = peeked.unwrap_or(' '); - let mut expect_opening = self.expect_opening; - if let Some(prev_double) = self.prev_quote_type.take() { - if double != prev_double { - expect_opening = true; - } + let opened = self.top(); + let before = before.unwrap_or(' '); + + // If we are after a number and haven't most recently opened a quote of + // this kind, produce a prime. Otherwise, we prefer a closing quote. + if before.is_numeric() && opened != Some(double) { + return if double { "″" } else { "′" }; } - if expect_opening { - self.quote_depth += 1; - self.prev_quote_type = Some(double); - quotes.open(double) - } else if self.quote_depth > 0 - && (peeked.is_ascii_punctuation() || is_exterior_to_quote(peeked)) - { - self.quote_depth -= 1; - quotes.close(double) - } else if self.last_num { - quotes.prime(double) - } else { - quotes.fallback(double) + // If we have a single smart quote, didn't recently open a single + // quotation, and are after an alphabetic char, interpret this as an + // apostrophe. + if !double && opened != Some(false) && before.is_alphabetic() { + return "’"; } + + // If the most recently opened quotation is of this kind and the + // previous char does not indicate a nested quotation, close it. + if opened == Some(double) + && !before.is_whitespace() + && !is_newline(before) + && !is_opening_bracket(before) + { + self.pop(); + return quotes.close(double); + } + + // Otherwise, open a new the quotation. + self.push(double); + quotes.open(double) + } + + /// The top of our quotation stack. Returns `Some(double)` for the most + /// recently opened quote or `None` if we didn't open one. + fn top(&self) -> Option { + self.depth.checked_sub(1).map(|i| (self.kinds >> i) & 1 == 1) + } + + /// Push onto the quotation stack. + fn push(&mut self, double: bool) { + if self.depth < 32 { + self.kinds |= (double as u32) << self.depth; + self.depth += 1; + } + } + + /// Pop from the quotation stack. + fn pop(&mut self) { + self.depth -= 1; + self.kinds &= (1 << self.depth) - 1; } } @@ -168,10 +180,7 @@ impl Default for SmartQuoter { } } -fn is_exterior_to_quote(c: char) -> bool { - c.is_whitespace() || is_newline(c) -} - +/// Whether the character is an opening bracket, parenthesis, or brace. fn is_opening_bracket(c: char) -> bool { matches!(c, '(' | '{' | '[') } @@ -196,13 +205,13 @@ impl<'s> SmartQuotes<'s> { /// region as an all-uppercase ISO 3166-alpha2 code. /// /// Currently, the supported languages are: English, Czech, Danish, German, - /// Swiss / Liechtensteinian German, Estonian, Icelandic, Italian, Latin, Lithuanian, - /// Latvian, Slovak, Slovenian, Spanish, Bosnian, Finnish, Swedish, French, - /// Hungarian, Polish, Romanian, Japanese, Traditional Chinese, Russian, and - /// Norwegian. + /// Swiss / Liechtensteinian German, Estonian, Icelandic, Italian, Latin, + /// Lithuanian, Latvian, Slovak, Slovenian, Spanish, Bosnian, Finnish, + /// Swedish, French, Hungarian, Polish, Romanian, Japanese, Traditional + /// Chinese, Russian, and Norwegian. /// /// For unknown languages, the English quotes are used as fallback. - pub fn new( + pub fn get( quotes: &'s Smart, lang: Lang, region: Option, @@ -281,24 +290,6 @@ impl<'s> SmartQuotes<'s> { self.single_close } } - - /// Which character should be used as a prime. - pub fn prime(&self, double: bool) -> &'static str { - if double { - "″" - } else { - "′" - } - } - - /// Which character should be used as a fallback quote. - pub fn fallback(&self, double: bool) -> &'static str { - if double { - "\"" - } else { - "’" - } - } } /// An opening and closing quote. diff --git a/tests/ref/smartquote-bracket.png b/tests/ref/smartquote-bracket.png new file mode 100644 index 000000000..7efcccf8a Binary files /dev/null and b/tests/ref/smartquote-bracket.png differ diff --git a/tests/ref/smartquote-close-before-letter.png b/tests/ref/smartquote-close-before-letter.png new file mode 100644 index 000000000..8061c12a6 Binary files /dev/null and b/tests/ref/smartquote-close-before-letter.png differ diff --git a/tests/ref/smartquote-escape.png b/tests/ref/smartquote-escape.png index 45d8f6027..ea4aef9c4 100644 Binary files a/tests/ref/smartquote-escape.png and b/tests/ref/smartquote-escape.png differ diff --git a/tests/ref/smartquote-prime.png b/tests/ref/smartquote-prime.png new file mode 100644 index 000000000..35c376122 Binary files /dev/null and b/tests/ref/smartquote-prime.png differ diff --git a/tests/ref/smartquote-slash.png b/tests/ref/smartquote-slash.png new file mode 100644 index 000000000..925f28e63 Binary files /dev/null and b/tests/ref/smartquote-slash.png differ diff --git a/tests/suite/text/smartquote.typ b/tests/suite/text/smartquote.typ index 4c27be074..a69ad25c9 100644 --- a/tests/suite/text/smartquote.typ +++ b/tests/suite/text/smartquote.typ @@ -56,7 +56,28 @@ He said "I'm a big fella." --- smartquote-escape --- // Test escape sequences. -The 5\'11\" 'quick\' brown fox jumps over the \"lazy" dog\'s ear. +The 5\'11\" 'quick\' brown fox jumps over the \"lazy' dog\'s ear. + +--- smartquote-slash --- +// Test that smartquotes can open before non-whitespace if not nested. +"Hello"/"World" \ +'"Hello"/"World"' \ +""Hello"/"World"" + +--- smartquote-close-before-letter --- +// Test that smartquotes can close before alphabetic letters. +Straight "A"s and "B"s + +--- smartquote-prime --- +// Test that primes result after numbers when possible. +A 2" nail. \ +'A 2" nail.' \ +"A 2" nail." + +--- smartquote-bracket --- +// Test that brackets indicate an opening quote. +"a ["b"] c" \ +"a b"c"d e" --- smartquote-disable --- // Test turning smart quotes off.