diff --git a/Cargo.lock b/Cargo.lock index 299e3a582..a046e53c1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3117,6 +3117,7 @@ dependencies = [ "tracing", "unicode-ident", "unicode-math-class", + "unicode-script", "unicode-segmentation", "unscanny", ] diff --git a/assets/fonts/NotoSerifCJKsc-Bold.otf b/assets/fonts/NotoSerifCJKsc-Bold.otf new file mode 100644 index 000000000..c291490e1 Binary files /dev/null and b/assets/fonts/NotoSerifCJKsc-Bold.otf differ diff --git a/assets/fonts/NotoSerifCJKtc-Bold.otf b/assets/fonts/NotoSerifCJKtc-Bold.otf new file mode 100644 index 000000000..73a2135ec Binary files /dev/null and b/assets/fonts/NotoSerifCJKtc-Bold.otf differ diff --git a/crates/typst-syntax/Cargo.toml b/crates/typst-syntax/Cargo.toml index 1254e6631..681189bd3 100644 --- a/crates/typst-syntax/Cargo.toml +++ b/crates/typst-syntax/Cargo.toml @@ -23,5 +23,6 @@ serde = { workspace = true } tracing = { workspace = true } unicode-ident = { workspace = true } unicode-math-class = { workspace = true } +unicode-script = { workspace = true } unicode-segmentation = { workspace = true } unscanny = { workspace = true } diff --git a/crates/typst-syntax/src/lexer.rs b/crates/typst-syntax/src/lexer.rs index c702551ce..ffe531453 100644 --- a/crates/typst-syntax/src/lexer.rs +++ b/crates/typst-syntax/src/lexer.rs @@ -1,5 +1,6 @@ use ecow::{eco_format, EcoString}; use unicode_ident::{is_xid_continue, is_xid_start}; +use unicode_script::{Script, UnicodeScript}; use unicode_segmentation::UnicodeSegmentation; use unscanny::Scanner; @@ -343,10 +344,18 @@ impl Lexer<'_> { } fn in_word(&self) -> bool { - let alphanum = |c: Option| c.map_or(false, |c| c.is_alphanumeric()); + let wordy = |c: Option| { + c.map_or(false, |c| { + c.is_alphanumeric() + && !matches!( + c.script(), + Script::Han | Script::Hiragana | Script::Katakana + ) + }) + }; let prev = self.s.scout(-2); let next = self.s.peek(); - alphanum(prev) && alphanum(next) + wordy(prev) && wordy(next) } fn space_or_end(&self) -> bool { diff --git a/tests/ref/text/emphasis.png b/tests/ref/text/emphasis.png index 333f7006e..c19f6ebb0 100644 Binary files a/tests/ref/text/emphasis.png and b/tests/ref/text/emphasis.png differ diff --git a/tests/ref/text/lang-with-region.png b/tests/ref/text/lang-with-region.png index 097f105d3..c7753104a 100644 Binary files a/tests/ref/text/lang-with-region.png and b/tests/ref/text/lang-with-region.png differ diff --git a/tests/typ/text/emphasis.typ b/tests/typ/text/emphasis.typ index fd04c8e7f..93913dcfe 100644 --- a/tests/typ/text/emphasis.typ +++ b/tests/typ/text/emphasis.typ @@ -7,6 +7,13 @@ _Emphasized and *strong* words!_ // Inside of a word it's a normal underscore or star. hello_world Nutzer*innen +// CJK characters will not need spaces. +中文一般使用*粗体*或者_楷体_来表示强调。 + +日本語では、*太字*や_斜体_を使って強調します。 + +中文中混有*Strong*和_Empasis_。 + // Can contain paragraph in nested content block. _Still #[