Use/allow U+2212 MINUS SIGN instead of U+002D HYPHEN MINUS everywhere (#2318)

2025-07-06 04:02:53 +08:00 · 2023-10-09 15:30:40 +02:00 · 2023-10-09 15:30:40 +02:00 · 0804a9e25d
commit 0804a9e25d
parent df49d3f0c6
16 changed files with 182 additions and 36 deletions
--- a/crates/typst-syntax/src/ast.rs
+++ b/crates/typst-syntax/src/ast.rs
@ -454,6 +454,7 @@ impl Shorthand<'_> {
    pub const MARKUP_LIST: &'static [(&'static str, char)] = &[
        ("...", '…'),
        ("~", '\u{00A0}'),
        ("-", '\u{2212}'), // Only before a digit
        ("--", '\u{2013}'),
        ("---", '\u{2014}'),
        ("-?", '\u{00AD}'),
--- a/crates/typst-syntax/src/lexer.rs
+++ b/crates/typst-syntax/src/lexer.rs
@ -171,6 +171,7 @@ impl Lexer<'_> {
            '-' if self.s.eat_if("--") => SyntaxKind::Shorthand,
            '-' if self.s.eat_if('-') => SyntaxKind::Shorthand,
            '-' if self.s.eat_if('?') => SyntaxKind::Shorthand,
            '-' if self.s.at(char::is_numeric) => SyntaxKind::Shorthand,
            '*' if !self.in_word() => SyntaxKind::Star,
            '_' if !self.in_word() => SyntaxKind::Underscore,
@ -480,7 +481,7 @@ impl Lexer<'_> {
            '<' if self.s.eat_if('=') => SyntaxKind::LtEq,
            '>' if self.s.eat_if('=') => SyntaxKind::GtEq,
            '+' if self.s.eat_if('=') => SyntaxKind::PlusEq,
-            '-' if self.s.eat_if('=') => SyntaxKind::HyphEq,
+            '-' | '\u{2212}' if self.s.eat_if('=') => SyntaxKind::HyphEq,
            '*' if self.s.eat_if('=') => SyntaxKind::StarEq,
            '/' if self.s.eat_if('=') => SyntaxKind::SlashEq,
            '.' if self.s.eat_if('.') => SyntaxKind::Dots,
@ -498,7 +499,7 @@ impl Lexer<'_> {
            ':' => SyntaxKind::Colon,
            '.' => SyntaxKind::Dot,
            '+' => SyntaxKind::Plus,
-            '-' => SyntaxKind::Minus,
+            '-' | '\u{2212}' => SyntaxKind::Minus,
            '*' => SyntaxKind::Star,
            '/' => SyntaxKind::Slash,
            '=' => SyntaxKind::Eq,
--- a/crates/typst/src/eval/float.rs
+++ b/crates/typst/src/eval/float.rs
@ -1,7 +1,10 @@
-use ecow::eco_format;
+use std::num::ParseFloatError;
-use super::{cast, func, scope, ty, Str};
+use ecow::{eco_format, EcoString};
 use super::{cast, func, scope, ty, Repr, Str};
 use crate::geom::Ratio;
 use crate::util::fmt::{format_float, MINUS_SIGN};
 /// A floating-point number.
 ///
@ -47,6 +50,12 @@ impl f64 {
    }
 }
 impl Repr for f64 {
    fn repr(&self) -> EcoString {
        format_float(*self, None, "")
    }
 }
 /// A value that can be cast to a float.
 pub struct ToFloat(f64);
@ -55,6 +64,13 @@ cast! {
    v: bool => Self(v as i64 as f64),
    v: i64 => Self(v as f64),
    v: Ratio => Self(v.get()),
-    v: Str => Self(v.parse().map_err(|_| eco_format!("invalid float: {}", v))?),
+    v: Str => Self(
        parse_float(v.clone().into())
            .map_err(|_| eco_format!("invalid float: {}", v))?
    ),
    v: f64 => Self(v),
 }
 fn parse_float(s: EcoString) -> Result<f64, ParseFloatError> {
    s.replace(MINUS_SIGN, "-").parse()
 }
--- a/crates/typst/src/eval/int.rs
+++ b/crates/typst/src/eval/int.rs
@ -1,5 +1,6 @@
-use std::num::{NonZeroI64, NonZeroIsize, NonZeroU64, NonZeroUsize};
+use std::num::{NonZeroI64, NonZeroIsize, NonZeroU64, NonZeroUsize, ParseIntError};
 use crate::util::fmt::{format_int_with_base, MINUS_SIGN};
 use ecow::{eco_format, EcoString};
 use super::{cast, func, scope, ty, Repr, Str, Value};
@ -53,13 +54,7 @@ impl i64 {
 impl Repr for i64 {
    fn repr(&self) -> EcoString {
-        eco_format!("{self}")
+        format_int_with_base(*self, 10)
    }
 }
 impl Repr for f64 {
    fn repr(&self) -> EcoString {
        eco_format!("{self}")
    }
 }
@ -70,10 +65,22 @@ cast! {
    ToInt,
    v: bool => Self(v as i64),
    v: f64 => Self(v as i64),
-    v: Str => Self(v.parse().map_err(|_| eco_format!("invalid integer: {}", v))?),
+    v: Str => Self(parse_int(&v).map_err(|_| eco_format!("invalid integer: {}", v))?),
    v: i64 => Self(v),
 }
 fn parse_int(mut s: &str) -> Result<i64, ParseIntError> {
    let mut sign = 1;
    if let Some(rest) = s.strip_prefix('-').or_else(|| s.strip_prefix(MINUS_SIGN)) {
        sign = -1;
        s = rest;
    }
    if sign == -1 && s == "9223372036854775808" {
        return Ok(i64::MIN);
    }
    Ok(sign * s.parse::<i64>()?)
 }
 macro_rules! signed_int {
    ($($ty:ty)*) => {
        $(cast! {
--- a/crates/typst/src/eval/str.rs
+++ b/crates/typst/src/eval/str.rs
@ -15,7 +15,7 @@ use crate::diag::{bail, At, SourceResult, StrResult};
 use crate::geom::Align;
 use crate::model::Label;
 use crate::syntax::{Span, Spanned};
-use crate::util::fmt::format_int_with_base;
+use crate::util::fmt::{format_float, format_int_with_base};
 /// Create a new [`Str`] from a format string.
 #[macro_export]
@ -610,7 +610,7 @@ pub enum ToStr {
 cast! {
    ToStr,
    v: i64 => Self::Int(v),
-    v: f64 => Self::Str(format_str!("{}", v)),
+    v: f64 => Self::Str(format_float(v, None, "").into()),
    v: Version => Self::Str(format_str!("{}", v)),
    v: Bytes => Self::Str(
        std::str::from_utf8(&v)
@ -970,13 +970,13 @@ mod tests {
        );
        assert_eq!(
            &format_int_with_base(i64::MIN, 2),
-            "-1000000000000000000000000000000000000000000000000000000000000000"
+            "\u{2212}1000000000000000000000000000000000000000000000000000000000000000"
        );
        assert_eq!(&format_int_with_base(i64::MAX, 10), "9223372036854775807");
-        assert_eq!(&format_int_with_base(i64::MIN, 10), "-9223372036854775808");
+        assert_eq!(&format_int_with_base(i64::MIN, 10), "\u{2212}9223372036854775808");
        assert_eq!(&format_int_with_base(i64::MAX, 16), "7fffffffffffffff");
-        assert_eq!(&format_int_with_base(i64::MIN, 16), "-8000000000000000");
+        assert_eq!(&format_int_with_base(i64::MIN, 16), "\u{2212}8000000000000000");
        assert_eq!(&format_int_with_base(i64::MAX, 36), "1y2p0ij32e8e7");
-        assert_eq!(&format_int_with_base(i64::MIN, 36), "-1y2p0ij32e8e8");
+        assert_eq!(&format_int_with_base(i64::MIN, 36), "\u{2212}1y2p0ij32e8e8");
    }
 }
--- a/crates/typst/src/eval/value.rs
+++ b/crates/typst/src/eval/value.rs
@ -21,6 +21,7 @@ use crate::eval::Datetime;
 use crate::geom::{Abs, Angle, Color, Em, Fr, Gradient, Length, Ratio, Rel};
 use crate::model::{Label, Styles};
 use crate::syntax::{ast, Span};
 use crate::util::fmt::{format_float, format_int_with_base};
 /// A computational value.
 #[derive(Debug, Default, Clone)]
@ -198,8 +199,8 @@ impl Value {
    pub fn display(self) -> Content {
        match self {
            Self::None => Content::empty(),
-            Self::Int(v) => item!(text)(eco_format!("{v}")),
+            Self::Int(v) => item!(text)(format_int_with_base(v, 10)),
-            Self::Float(v) => item!(text)(eco_format!("{v}")),
+            Self::Float(v) => item!(text)(format_float(v, None, "")),
            Self::Str(v) => item!(text)(v.into()),
            Self::Version(v) => item!(text)(eco_format!("{v}")),
            Self::Symbol(v) => item!(text)(v.get().into()),
--- a/crates/typst/src/util/fmt.rs
+++ b/crates/typst/src/util/fmt.rs
@ -1,19 +1,16 @@
 use ecow::{eco_format, EcoString};
 pub const MINUS_SIGN: &str = "\u{2212}";
 /// Format an integer in a base.
 pub fn format_int_with_base(mut n: i64, base: i64) -> EcoString {
    if n == 0 {
        return "0".into();
    }
-    // In Rust, `format!("{:x}", -14i64)` is not `-e` but `fffffffffffffff2`.
+    // The largest output is `to_base(i64::MIN, 2)`, which is 64 bytes long,
-    // So we can only use the built-in for decimal, not bin/oct/hex.
+    // plus the length of the minus sign.
-    if base == 10 {
+    const SIZE: usize = 64 + MINUS_SIGN.len();
        return eco_format!("{n}");
    }
    // The largest output is `to_base(i64::MIN, 2)`, which is 65 chars long.
    const SIZE: usize = 65;
    let mut digits = [b'\0'; SIZE];
    let mut i = SIZE;
@ -32,8 +29,9 @@ pub fn format_int_with_base(mut n: i64, base: i64) -> EcoString {
    }
    if negative {
-        i -= 1;
+        let prev = i;
-        digits[i] = b'-';
+        i -= MINUS_SIGN.len();
        digits[i..prev].copy_from_slice(MINUS_SIGN.as_bytes());
    }
    std::str::from_utf8(&digits[i..]).unwrap_or_default().into()
@ -46,7 +44,13 @@ pub fn format_float(mut value: f64, precision: Option<u8>, suffix: &str) -> EcoS
        let offset = 10_f64.powi(p as i32);
        value = (value * offset).round() / offset;
    }
    if value.is_nan() {
        "NaN".into()
    } else if value.is_sign_negative() {
        eco_format!("{}{}{}", MINUS_SIGN, value.abs(), suffix)
    } else {
        eco_format!("{}{}", value, suffix)
    }
 }
 /// Format pieces separated with commas and a final "and" or "or".
--- a/tests/ref/compiler/array.png
+++ b/tests/ref/compiler/array.png
--- a/tests/ref/compiler/repr-color-gradient.png
+++ b/tests/ref/compiler/repr-color-gradient.png
--- a/tests/ref/layout/grid-2.png
+++ b/tests/ref/layout/grid-2.png
--- a/tests/ref/text/edge.png
+++ b/tests/ref/text/edge.png
--- a/tests/ref/text/numbers.png
+++ b/tests/ref/text/numbers.png
--- a/tests/typ/compiler/methods.typ
+++ b/tests/typ/compiler/methods.typ
@ -76,7 +76,7 @@
 #test((5em + 6in).abs.inches(), 6.0)
 ---
-// Error: 2-21 cannot convert a length with non-zero em units (`-6pt + 10.5em`) to pt
+// Error: 2-21 cannot convert a length with non-zero em units (`−6pt + 10.5em`) to pt
 // Hint: 2-21 use `length.abs.pt()` instead to ignore its em component
 #(10.5em - 6pt).pt()
@ -86,7 +86,7 @@
 #(3em).cm()
 ---
-// Error: 2-20 cannot convert a length with non-zero em units (`-226.77pt + 93em`) to mm
+// Error: 2-20 cannot convert a length with non-zero em units (`−226.77pt + 93em`) to mm
 // Hint: 2-20 use `length.abs.mm()` instead to ignore its em component
 #(93em - 80mm).mm()
--- a/tests/typ/compute/calc.typ
+++ b/tests/typ/compute/calc.typ
@ -7,10 +7,16 @@
 #test(int(true), 1)
 #test(int(10), 10)
 #test(int("150"), 150)
 #test(int("-834"), -834)
 #test(int("\u{2212}79"), -79)
 #test(int(10 / 3), 3)
 #test(float(10), 10.0)
 #test(float(50% * 30%), 0.15)
 #test(float("31.4e-1"), 3.14)
 #test(float("31.4e\u{2212}1"), 3.14)
 #test(float("3.1415"), 3.1415)
 #test(float("-7654.321"), -7654.321)
 #test(float("\u{2212}7654.321"), -7654.321)
 #test(type(float(10)), float)
 ---
--- a/tests/typ/compute/construct.typ
+++ b/tests/typ/compute/construct.typ
@ -162,7 +162,7 @@
 // Test conversion to string.
 #test(str(123), "123")
 #test(str(123, base: 3), "11120")
-#test(str(-123, base: 16), "-7b")
+#test(str(-123, base: 16), "−7b")
 #test(str(9223372036854775807, base: 36), "1y2p0ij32e8e7")
 #test(str(50.14), "50.14")
 #test(str(10 / 3).len() > 10, true)
--- a/tests/typ/text/numbers.typ
+++ b/tests/typ/text/numbers.typ
@ -0,0 +1,110 @@
 // Test how numbers are displayed.
 ---
 // Test numbers in text mode.
 12 \
 12.0 \
 3.14 \
 1234567890 \
 0123456789 \
 0 \
 0.0 \
 +0 \
 +0.0 \
 -0 \
 -0.0 \
 -1 \
 -3.14 \
 -9876543210 \
 -0987654321 \
 ٣٫١٤ \
 -٣٫١٤ \
 -¾ \
 #text(fractions: true)[-3/2] \
 2022 - 2023 \
 2022 -- 2023 \
 2022--2023 \
 2022-2023 \
 ٢٠٢٢ - ٢٠٢٣ \
 ٢٠٢٢ -- ٢٠٢٣ \
 ٢٠٢٢--٢٠٢٣ \
 ٢٠٢٢-٢٠٢٣ \
 -500 -- -400
 ---
 // Test integers.
 #12 \
 #1234567890 \
 #0123456789 \
 #0 \
 #(-0) \
 #(-1) \
 #(-9876543210) \
 #(-0987654321) \
 #(4 - 8)
 ---
 // Test floats.
 #12.0 \
 #3.14 \
 #1234567890.0 \
 #0123456789.0 \
 #0.0 \
 #(-0.0) \
 #(-1.0) \
 #(-9876543210.0) \
 #(-0987654321.0) \
 #(-3.14) \
 #(4.0 - 8.0)
 ---
 // Test the `str` function with integers.
 #str(12) \
 #str(1234567890) \
 #str(0123456789) \
 #str(0) \
 #str(-0) \
 #str(-1) \
 #str(-9876543210) \
 #str(-0987654321) \
 #str(4 - 8)
 ---
 // Test the `str` function with floats.
 #str(12.0) \
 #str(3.14) \
 #str(1234567890.0) \
 #str(0123456789.0) \
 #str(0.0) \
 #str(-0.0) \
 #str(-1.0) \
 #str(-9876543210.0) \
 #str(-0987654321.0) \
 #str(-3.14) \
 #str(4.0 - 8.0)
 ---
 // Test the `repr` function with integers.
 #repr(12) \
 #repr(1234567890) \
 #repr(0123456789) \
 #repr(0) \
 #repr(-0) \
 #repr(-1) \
 #repr(-9876543210) \
 #repr(-0987654321) \
 #repr(4 - 8)
 ---
 // Test the `repr` function with floats.
 #repr(12.0) \
 #repr(3.14) \
 #repr(1234567890.0) \
 #repr(0123456789.0) \
 #repr(0.0) \
 #repr(-0.0) \
 #repr(-1.0) \
 #repr(-9876543210.0) \
 #repr(-0987654321.0) \
 #repr(-3.14) \
 #repr(4.0 - 8.0)