refactor number lexing to improve error messages

This commit is contained in:
Ian Wrzesinski 2025-02-27 09:40:52 -05:00
parent cfb3b1a270
commit e66fc3cc2e
3 changed files with 148 additions and 67 deletions

View File

@ -807,44 +807,47 @@ impl Lexer<'_> {
} }
} }
fn number(&mut self, mut start: usize, c: char) -> SyntaxKind { fn number(&mut self, mut start: usize, first_c: char) -> SyntaxKind {
// Handle alternative integer bases. // Handle alternative integer bases.
let mut base = 10; let mut base = 10;
if c == '0' { let mut is_float = false; // `true` implies `base == 10`
if self.s.eat_if('b') { match first_c {
base = 2; '0' if self.s.eat_if('b') => base = 2,
} else if self.s.eat_if('o') { '0' if self.s.eat_if('o') => base = 8,
base = 8; '0' if self.s.eat_if('x') => base = 16,
} else if self.s.eat_if('x') { '.' => is_float = true,
base = 16; _ => {}
} }
if base != 10 { if base != 10 {
start = self.s.cursor(); start = self.s.cursor();
}
} }
// Read the first part (integer or fractional depending on `first`). // Read the first part (integer or fractional depending on `first`).
self.s.eat_while(if base == 16 { if base == 16 {
char::is_ascii_alphanumeric self.s.eat_while(char::is_ascii_alphanumeric);
} else { } else {
char::is_ascii_digit
});
// Read the fractional part if not already done.
// Make sure not to confuse a range for the decimal separator.
if c != '.'
&& !self.s.at("..")
&& !self.s.scout(1).is_some_and(is_id_start)
&& self.s.eat_if('.')
&& base == 10
{
self.s.eat_while(char::is_ascii_digit); self.s.eat_while(char::is_ascii_digit);
} }
// Read the exponent. // Maybe read a floating point number
if !self.s.at("em") && self.s.eat_if(['e', 'E']) && base == 10 { if base == 10 {
self.s.eat_if(['+', '-']); // Read the fractional part if not already done.
self.s.eat_while(char::is_ascii_digit); // Make sure not to confuse a range for the decimal separator.
if first_c != '.'
&& !self.s.at("..")
&& !self.s.scout(1).is_some_and(is_id_start)
&& self.s.eat_if('.')
{
is_float = true;
self.s.eat_while(char::is_ascii_digit);
}
// Read the exponent.
if !self.s.at("em") && self.s.eat_if(['e', 'E']) {
is_float = true;
self.s.eat_if(['+', '-']);
self.s.eat_while(char::is_ascii_digit);
}
} }
// Read the suffix. // Read the suffix.
@ -856,37 +859,95 @@ impl Lexer<'_> {
let number = self.s.get(start..suffix_start); let number = self.s.get(start..suffix_start);
let suffix = self.s.from(suffix_start); let suffix = self.s.from(suffix_start);
let kind = if i64::from_str_radix(number, base).is_ok() { let number_result = if is_float && number.parse::<f64>().is_err() {
SyntaxKind::Int // The only invalid case should be when a float lacks digits after
} else if base == 10 && number.parse::<f64>().is_ok() { // the exponent: e.g. `1.2e` or `2.3E-`.
SyntaxKind::Float Err(eco_format!("invalid floating point number: {number}"))
} else if base != 10 {
match i64::from_str_radix(number, base) {
Ok(int) => Ok(Some(int)), // Used for better errors below.
Err(_) => {
let (name, prefix) = match base {
2 => ("binary", "0b"),
8 => ("octal", "0o"),
16 => ("hexadecimal", "0x"),
_ => unreachable!(),
};
Err(eco_format!("invalid {name} number: {prefix}{number}"))
}
}
} else { } else {
return self.error(match base { Ok(None)
2 => eco_format!("invalid binary number: 0b{}", number),
8 => eco_format!("invalid octal number: 0o{}", number),
16 => eco_format!("invalid hexadecimal number: 0x{}", number),
_ => eco_format!("invalid number: {}", number),
});
}; };
if suffix.is_empty() { let maybe_suffix_result = match suffix {
return kind; "" => None,
} "pt" | "mm" | "cm" | "in" | "deg" | "rad" | "em" | "fr" | "%" => Some(Ok(())),
_ => {
// Pass a hint for when the invalid suffix starts valid.
let valid_start_len = if suffix.starts_with('%') {
Some(1)
} else if matches!(
suffix.get(0..2),
Some("pt" | "mm" | "cm" | "in" | "em" | "fr",)
) {
Some(2)
} else if matches!(suffix.get(0..3), Some("deg" | "rad")) {
Some(3)
} else {
None
};
let maybe_hint = valid_start_len.map(|len| {
eco_format!("try adding a space after: `{}`", &suffix[0..len])
});
Some(Err(maybe_hint))
}
};
if !matches!( // Return our number or write an error with helpful hints.
suffix, match (number_result, maybe_suffix_result) {
"pt" | "mm" | "cm" | "in" | "deg" | "rad" | "em" | "fr" | "%" // Valid numbers :D
) { (Ok(None), Some(Ok(()))) => SyntaxKind::Numeric,
return self.error(eco_format!("invalid number suffix: {}", suffix)); (Ok(_), None) if is_float => SyntaxKind::Float,
(Ok(_), None) => SyntaxKind::Int,
// Invalid numbers :(
(Ok(Some(decimal_value)), Some(suffix_res)) => {
let name = match base {
2 => "binary",
8 => "octal",
16 => "hexadecimal",
_ => unreachable!(),
};
let err = self.error(eco_format!("{name} numbers cannot have a suffix"));
if let Err(maybe_hint) = suffix_res {
self.hint(eco_format!("invalid number suffix: {suffix}"));
if let Some(h) = maybe_hint {
self.hint(h);
}
} else {
self.hint(eco_format!(
"try using a decimal number: {decimal_value}{suffix}"
));
}
err
}
(Ok(None), Some(Err(maybe_hint))) => {
let err = self.error(eco_format!("invalid number suffix: {suffix}"));
if let Some(h) = maybe_hint {
self.hint(h);
}
err
}
(Err(message), Some(Err(maybe_hint))) => {
let err = self.error(message);
self.hint(eco_format!("invalid number suffix: {suffix}"));
if let Some(h) = maybe_hint {
self.hint(h);
}
err
}
(Err(message), None | Some(Ok(()))) => self.error(message),
} }
if base != 10 {
let kind = self.error(eco_format!("invalid base-{base} prefix"));
self.hint("numbers with a unit cannot have a base prefix");
return kind;
}
SyntaxKind::Numeric
} }
fn string(&mut self) -> SyntaxKind { fn string(&mut self) -> SyntaxKind {

View File

@ -107,11 +107,11 @@
#123.E // this is a field access, so is fine syntactically #123.E // this is a field access, so is fine syntactically
#0.e #0.e
#1.E+020 #1.E+020
// Error: 2-10 invalid number: 123.456e // Error: 2-10 invalid floating point number: 123.456e
#123.456e #123.456e
// Error: 2-11 invalid number: 123.456e+ // Error: 2-11 invalid floating point number: 123.456e+
#123.456e+ #123.456e+
// Error: 2-6 invalid number: .1E- // Error: 2-6 invalid floating point number: .1E-
#.1E- #.1E-
// Error: 2-4 invalid number: 0e // Error: 2-4 invalid floating point number: 0e
#0e #0e

View File

@ -75,11 +75,25 @@
// Hint: 2-24 or use `length.abs.inches()` instead to ignore its em component // Hint: 2-24 or use `length.abs.inches()` instead to ignore its em component
#(4.5em + 6in).inches() #(4.5em + 6in).inches()
--- issue-5519-length-base --- --- issue-5519-nondecimal-suffix ---
// Error: 2-9 invalid base-2 prefix // Error: 2-9 binary numbers cannot have a suffix
// Hint: 2-9 numbers with a unit cannot have a base prefix // Hint: 2-9 try using a decimal number: 4pt
#0b100pt #0b100pt
--- nondecimal-suffix-edge-cases ---
// Error: 2-7 octal numbers cannot have a suffix
// Hint: 2-7 try using a decimal number: 50%
#0o62%
// Error: 2-8 hexadecimal numbers cannot have a suffix
// Hint: 2-8 try using a decimal number: 2748%
#0xabc%
// Error: 2-9 invalid hexadecimal number: 0xabcem
#0xabcem
// Error: 2-11 binary numbers cannot have a suffix
// Hint: 2-11 invalid number suffix: dag
#0b0101dag
--- number-syntax-edge-cases --- --- number-syntax-edge-cases ---
// Test numeric syntax edge cases with suffixes and which spans of text are // Test numeric syntax edge cases with suffixes and which spans of text are
// highlighted. Valid items are those not annotated with an error comment since // highlighted. Valid items are those not annotated with an error comment since
@ -92,17 +106,23 @@
#1.2E+0% #1.2E+0%
#1.2e-0% #1.2e-0%
#0.0e0deg #0.0e0deg
#5in%
#0.% #0.%
#5in%
// Error: 2-8 invalid number suffix: hello // Error: 2-8 invalid number suffix: hello
#1hello #1hello
// Error: 2-7 invalid number suffix: infr // Error: 2-7 invalid number suffix: infr
// Hint: 2-7 try adding a space after: `in`
#1infr #1infr
// Error: 2-5 invalid number: 2E // Error: 2-5 invalid floating point number: 2E
// Hint: 2-5 invalid number suffix: M
#2EM #2EM
// Error: 2-8 invalid number: .1E- // Error: 2-8 invalid floating point number: .1E-
#.1E-fr #.1E-fr
// Error: 2-16 invalid number: 0.1E+ // Error: 2-16 invalid floating point number: 0.1E+
// Hint: 2-16 invalid number suffix: fr123e456
// Hint: 2-16 try adding a space after: `fr`
#0.1E+fr123e456 #0.1E+fr123e456
// Error: 2-11 invalid number: .1e- // Error: 2-11 invalid floating point number: .1e-
// Hint: 2-11 invalid number suffix: fr123
// Hint: 2-11 try adding a space after: `fr`
#.1e-fr123.456 #.1e-fr123.456