Support underscores in numeric literals

This commit is contained in:
+merlan #flirora 2025-02-03 13:41:48 -05:00
parent 5b3593e571
commit 124bd97319
4 changed files with 54 additions and 10 deletions

View File

@ -2,7 +2,7 @@
//!
//! The AST is rooted in the [`Markup`] node.
use std::num::NonZeroUsize;
use std::num::{NonZeroUsize, ParseIntError};
use std::ops::Deref;
use std::path::Path;
use std::str::FromStr;
@ -10,6 +10,7 @@ use std::str::FromStr;
use ecow::EcoString;
use unscanny::Scanner;
use crate::lexer::ignore_underscores;
use crate::package::PackageSpec;
use crate::{is_ident, is_newline, Span, SyntaxKind, SyntaxNode};
@ -1003,18 +1004,26 @@ impl Int<'_> {
pub fn get(self) -> i64 {
let text = self.0.text();
if let Some(rest) = text.strip_prefix("0x") {
i64::from_str_radix(rest, 16)
int_from_str_radix_ignoring_underscores(rest, 16)
} else if let Some(rest) = text.strip_prefix("0o") {
i64::from_str_radix(rest, 8)
int_from_str_radix_ignoring_underscores(rest, 8)
} else if let Some(rest) = text.strip_prefix("0b") {
i64::from_str_radix(rest, 2)
int_from_str_radix_ignoring_underscores(rest, 2)
} else {
text.parse()
int_from_str_radix_ignoring_underscores(text, 10)
}
.unwrap_or_default()
}
}
fn int_from_str_radix_ignoring_underscores(
s: &str,
radix: u32,
) -> Result<i64, ParseIntError> {
let s = ignore_underscores(s);
i64::from_str_radix(&s, radix)
}
node! {
/// A floating-point number: `1.2`, `10e-4`.
Float
@ -1023,7 +1032,8 @@ node! {
impl Float<'_> {
/// Get the floating-point value.
pub fn get(self) -> f64 {
self.0.text().parse().unwrap_or_default()
let s = ignore_underscores(self.0.text());
s.parse().unwrap_or_default()
}
}

View File

@ -1,3 +1,5 @@
use std::borrow::Cow;
use ecow::{eco_format, EcoString};
use unicode_ident::{is_xid_continue, is_xid_start};
use unicode_script::{Script, UnicodeScript};
@ -825,9 +827,9 @@ impl Lexer<'_> {
// Read the first part (integer or fractional depending on `first`).
self.s.eat_while(if base == 16 {
char::is_ascii_alphanumeric
is_ascii_alphanumeric_or_underscore
} else {
char::is_ascii_digit
is_ascii_digit_or_underscore
});
// Read the fractional part if not already done.
@ -838,7 +840,7 @@ impl Lexer<'_> {
&& self.s.eat_if('.')
&& base == 10
{
self.s.eat_while(char::is_ascii_digit);
self.s.eat_while(is_ascii_digit_or_underscore);
}
// Read the exponent.
@ -855,8 +857,9 @@ impl Lexer<'_> {
let number = self.s.get(start..suffix_start);
let suffix = self.s.from(suffix_start);
let number = ignore_underscores(number);
let kind = if i64::from_str_radix(number, base).is_ok() {
let kind = if i64::from_str_radix(&number, base).is_ok() {
SyntaxKind::Int
} else if base == 10 && number.parse::<f64>().is_ok() {
SyntaxKind::Float
@ -1098,3 +1101,21 @@ fn is_valid_in_label_literal(c: char) -> bool {
pub fn is_valid_label_literal_id(id: &str) -> bool {
!id.is_empty() && id.chars().all(is_valid_in_label_literal)
}
#[inline]
fn is_ascii_digit_or_underscore(c: char) -> bool {
c.is_ascii_digit() || c == '_'
}
#[inline]
fn is_ascii_alphanumeric_or_underscore(c: char) -> bool {
c.is_ascii_alphanumeric() || c == '_'
}
pub(crate) fn ignore_underscores(s: &str) -> Cow<'_, str> {
if s.contains('_') {
Cow::Owned(s.chars().filter(|c| *c != '_').collect())
} else {
Cow::Borrowed(s)
}
}

View File

@ -115,3 +115,8 @@
#.1E-
// Error: 2-4 invalid number: 0e
#0e
--- float-digit-separators ---
// Test digit separators in float litereals.
#test(1_234.567_890, 1234.56789)

View File

@ -108,3 +108,11 @@
--- number-invalid-suffix ---
// Error: 2-4 invalid number suffix: u
#1u
--- int-digit-separators ---
// Test digit separators in integer litereals.
#test(123_456_789, 123456789)
#test(0b0101_1010, 90)
#test(0x1234_5678, 305419896)
#test(0o222_333_444, 38385444)