Added escaping to ActiveEnum to allow for non-UAX#31 chars, as well as camel_case related conflicts. (#1374)
* Added escaping to ActiveEnum to allow for non-Unicode Standard Annex #31 characters, as well as addressing issues with ' ' and '_' causing potential identifier conflicts. * Improved docstring for camel_case_with_escaped_non_xid. * Moved underscore prepending to camel_case_with_escaped_non_xid. * cargo fmt and code fixes to resolve CI failures. * Added unittest to sea_orm_macros::util for new function. * Fixed a typo in a doc code block. * clippy * Test cases --------- Co-authored-by: Billy Chan <ccw.billy.123@gmail.com>
This commit is contained in:
parent
16015cc35d
commit
5ef273d5e4
@ -23,9 +23,10 @@ syn = { version = "1", default-features = false, features = ["parsing", "proc-ma
|
||||
quote = { version = "1", default-features = false }
|
||||
heck = { version = "0.4", default-features = false }
|
||||
proc-macro2 = { version = "1", default-features = false }
|
||||
unicode-ident = { version = "1" }
|
||||
|
||||
[dev-dependencies]
|
||||
sea-orm = { path = "../", features = ["macros"] }
|
||||
sea-orm = { path = "../", features = ["macros", "tests-cfg"] }
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
|
||||
[features]
|
||||
|
@ -1,3 +1,4 @@
|
||||
use super::util::camel_case_with_escaped_non_uax31;
|
||||
use heck::ToUpperCamelCase;
|
||||
use proc_macro2::TokenStream;
|
||||
use quote::{format_ident, quote, quote_spanned};
|
||||
@ -247,11 +248,9 @@ impl ActiveEnum {
|
||||
let enum_variants: Vec<syn::Ident> = str_variants
|
||||
.iter()
|
||||
.map(|v| {
|
||||
if v.chars().next().map(char::is_numeric).unwrap_or(false) {
|
||||
format_ident!("_{}", v)
|
||||
} else {
|
||||
format_ident!("{}", v.to_upper_camel_case())
|
||||
}
|
||||
let v_cleaned = camel_case_with_escaped_non_uax31(v);
|
||||
|
||||
format_ident!("{}", v_cleaned)
|
||||
})
|
||||
.collect();
|
||||
|
||||
|
@ -1,3 +1,4 @@
|
||||
use heck::ToUpperCamelCase;
|
||||
use quote::format_ident;
|
||||
use syn::{punctuated::Punctuated, token::Comma, Field, Ident, Meta};
|
||||
|
||||
@ -54,6 +55,97 @@ where
|
||||
}
|
||||
}
|
||||
|
||||
/// Turn a string to PascalCase while escaping all special characters in ASCII words.
|
||||
///
|
||||
/// (camel_case is used here to match naming of heck.)
|
||||
///
|
||||
/// In ActiveEnum, string_value will be PascalCased and made
|
||||
/// an identifier in {Enum}Variant.
|
||||
///
|
||||
/// However Rust only allows for XID_Start char followed by
|
||||
/// XID_Continue characters as identifiers; this causes a few
|
||||
/// problems:
|
||||
///
|
||||
/// - `string_value = ""` will cause a panic;
|
||||
/// - `string_value` containing only non-alphanumerics will become `""`
|
||||
/// and cause the above panic;
|
||||
/// - `string_values`:
|
||||
/// - `"A B"`
|
||||
/// - `"A B"`
|
||||
/// - `"A_B"`
|
||||
/// - `"A_ B"`
|
||||
/// shares the same identifier of `"AB"`;
|
||||
///
|
||||
/// This function does the PascelCase conversion with a few special escapes:
|
||||
/// - Non-Unicode Standard Annex #31 compliant characters will converted to their hex notation;
|
||||
/// - `"_"` into `"0x5F"`;
|
||||
/// - `" "` into `"0x20"`;
|
||||
/// - Empty strings will become special keyword of `"__Empty"`
|
||||
///
|
||||
/// Note that this does NOT address:
|
||||
///
|
||||
/// - case-sensitivity. String value "ABC" and "abc" remains
|
||||
/// conflicted after .camel_case().
|
||||
///
|
||||
/// Example Conversions:
|
||||
///
|
||||
/// ```ignore
|
||||
/// assert_eq!(camel_case_with_escaped_non_uax31(""), "__Empty");
|
||||
/// assert_eq!(camel_case_with_escaped_non_uax31(" "), "_0x20");
|
||||
/// assert_eq!(camel_case_with_escaped_non_uax31(" "), "_0x200x20");
|
||||
/// assert_eq!(camel_case_with_escaped_non_uax31("_"), "_0x5F");
|
||||
/// assert_eq!(camel_case_with_escaped_non_uax31("foobar"), "Foobar");
|
||||
/// assert_eq!(camel_case_with_escaped_non_uax31("foo bar"), "Foo0x20bar");
|
||||
/// ```
|
||||
pub(crate) fn camel_case_with_escaped_non_uax31<T>(string: T) -> String
|
||||
where
|
||||
T: ToString,
|
||||
{
|
||||
let additional_chars_to_replace: [char; 2] = ['_', ' '];
|
||||
|
||||
let mut rebuilt = string
|
||||
.to_string()
|
||||
.chars()
|
||||
.enumerate()
|
||||
.map(|(pos, char_)| {
|
||||
if !additional_chars_to_replace.contains(&char_)
|
||||
&& match pos {
|
||||
0 => unicode_ident::is_xid_start(char_),
|
||||
_ => unicode_ident::is_xid_continue(char_),
|
||||
}
|
||||
{
|
||||
char_.to_string()
|
||||
} else {
|
||||
format!("{:#X}", char_ as u32)
|
||||
}
|
||||
})
|
||||
.reduce(
|
||||
// Join the "characters" (now strings)
|
||||
// back together
|
||||
|lhs, rhs| lhs + rhs.as_str(),
|
||||
)
|
||||
.map_or(
|
||||
// if string_value is ""
|
||||
// Make sure the default does NOT go through camel_case,
|
||||
// as the __ will be removed! The underscores are
|
||||
// what guarantees this being special case avoiding
|
||||
// all potential conflicts.
|
||||
String::from("__Empty"),
|
||||
|s| s.to_upper_camel_case(),
|
||||
);
|
||||
|
||||
if rebuilt
|
||||
.chars()
|
||||
.next()
|
||||
.map(char::is_numeric)
|
||||
.unwrap_or(false)
|
||||
{
|
||||
rebuilt = String::from("_") + &rebuilt;
|
||||
}
|
||||
|
||||
rebuilt
|
||||
}
|
||||
|
||||
pub(crate) const RAW_IDENTIFIER: &str = "r#";
|
||||
|
||||
pub(crate) const RUST_KEYWORDS: [&str; 49] = [
|
||||
@ -65,3 +157,42 @@ pub(crate) const RUST_KEYWORDS: [&str; 49] = [
|
||||
];
|
||||
|
||||
pub(crate) const RUST_SPECIAL_KEYWORDS: [&str; 3] = ["crate", "Self", "self"];
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_non_uax31_escape() {
|
||||
// Test empty string
|
||||
assert_eq!(camel_case_with_escaped_non_uax31(""), "__Empty");
|
||||
|
||||
// Test additional_chars_to_replace (to_camel_case related characters)
|
||||
assert_eq!(camel_case_with_escaped_non_uax31(" "), "_0x20");
|
||||
|
||||
// Test additional_chars_to_replace (multiples. ensure distinct from single)
|
||||
assert_eq!(camel_case_with_escaped_non_uax31(" "), "_0x200x20");
|
||||
|
||||
// Test additional_chars_to_replace (udnerscores)
|
||||
assert_eq!(camel_case_with_escaped_non_uax31("_"), "_0x5F");
|
||||
|
||||
// Test typical use case
|
||||
assert_eq!(camel_case_with_escaped_non_uax31("foobar"), "Foobar");
|
||||
|
||||
// Test spaced words distinct from non-spaced
|
||||
assert_eq!(camel_case_with_escaped_non_uax31("foo bar"), "Foo0x20bar");
|
||||
|
||||
// Test undescored words distinct from non-spaced and spaced
|
||||
assert_eq!(camel_case_with_escaped_non_uax31("foo_bar"), "Foo0x5Fbar");
|
||||
|
||||
// Test leading numeric characters
|
||||
assert_eq!(camel_case_with_escaped_non_uax31("1"), "_0x31");
|
||||
|
||||
// Test escaping also works on full string following lead numeric character
|
||||
// This was previously a fail condition.
|
||||
assert_eq!(
|
||||
camel_case_with_escaped_non_uax31("1 2 3"),
|
||||
"_0x310x2020x203"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
@ -548,6 +548,9 @@ pub fn derive_active_model_behavior(input: TokenStream) -> TokenStream {
|
||||
/// - For enum variant
|
||||
/// - `string_value` or `num_value`:
|
||||
/// - For `string_value`, value should be passed as string, i.e. `string_value = "A"`
|
||||
/// - Due to the way internal Enums are automatically derived, the following restrictions apply:
|
||||
/// - members cannot share identical `string_value`, case-insensitive.
|
||||
/// - in principle, any future Titlecased Rust keywords are not valid `string_value`.
|
||||
/// - For `num_value`, value should be passed as integer, i.e. `num_value = 1` or `num_value = 1i32`
|
||||
/// - Note that only one of it can be specified, and all variants of an enum have to annotate with the same `*_value` macro attribute
|
||||
///
|
||||
|
@ -408,4 +408,89 @@ mod tests {
|
||||
test_fallback_uint!(U32Fallback, u32, "u32", "Integer", Integer);
|
||||
test_fallback_uint!(U64Fallback, u64, "u64", "BigInteger", BigInteger);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn escaped_non_uax31() {
|
||||
#[derive(Debug, Clone, PartialEq, Eq, EnumIter, DeriveActiveEnum, Copy)]
|
||||
#[sea_orm(rs_type = "String", db_type = "Enum", enum_name = "pop_os_names_typos")]
|
||||
pub enum PopOSTypos {
|
||||
#[sea_orm(string_value = "Pop!_OS")]
|
||||
PopOSCorrect,
|
||||
#[sea_orm(string_value = "Pop\u{2757}_OS")]
|
||||
PopOSEmoji,
|
||||
#[sea_orm(string_value = "Pop!_操作系统")]
|
||||
PopOSChinese,
|
||||
#[sea_orm(string_value = "PopOS")]
|
||||
PopOSASCIIOnly,
|
||||
#[sea_orm(string_value = "Pop OS")]
|
||||
PopOSASCIIOnlyWithSpace,
|
||||
#[sea_orm(string_value = "Pop!OS")]
|
||||
PopOSNoUnderscore,
|
||||
#[sea_orm(string_value = "Pop_OS")]
|
||||
PopOSNoExclaimation,
|
||||
#[sea_orm(string_value = "!PopOS_")]
|
||||
PopOSAllOverThePlace,
|
||||
#[sea_orm(string_value = "Pop!_OS22.04LTS")]
|
||||
PopOSWithVersion,
|
||||
#[sea_orm(string_value = "22.04LTSPop!_OS")]
|
||||
PopOSWithVersionPrefix,
|
||||
#[sea_orm(string_value = "!_")]
|
||||
PopOSJustTheSymbols,
|
||||
#[sea_orm(string_value = "")]
|
||||
Nothing,
|
||||
// This WILL fail:
|
||||
// Both PopOs and PopOS will create identifier "Popos"
|
||||
// #[sea_orm(string_value = "PopOs")]
|
||||
// PopOSLowerCase,
|
||||
}
|
||||
let values = [
|
||||
"Pop!_OS",
|
||||
"Pop\u{2757}_OS",
|
||||
"Pop!_操作系统",
|
||||
"PopOS",
|
||||
"Pop OS",
|
||||
"Pop!OS",
|
||||
"Pop_OS",
|
||||
"!PopOS_",
|
||||
"Pop!_OS22.04LTS",
|
||||
"22.04LTSPop!_OS",
|
||||
"!_",
|
||||
"",
|
||||
];
|
||||
for (variant, val) in PopOSTypos::iter().zip(values) {
|
||||
assert_eq!(variant.to_value(), val);
|
||||
assert_eq!(PopOSTypos::try_from_value(&val.to_owned()), Ok(variant));
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, EnumIter, DeriveActiveEnum)]
|
||||
#[sea_orm(
|
||||
rs_type = "String",
|
||||
db_type = "String(None)",
|
||||
enum_name = "conflicting_string_values"
|
||||
)]
|
||||
pub enum ConflictingStringValues {
|
||||
#[sea_orm(string_value = "")]
|
||||
Member1,
|
||||
#[sea_orm(string_value = "$")]
|
||||
Member2,
|
||||
#[sea_orm(string_value = "$$")]
|
||||
Member3,
|
||||
#[sea_orm(string_value = "AB")]
|
||||
Member4,
|
||||
#[sea_orm(string_value = "A_B")]
|
||||
Member5,
|
||||
#[sea_orm(string_value = "A$B")]
|
||||
Member6,
|
||||
#[sea_orm(string_value = "0 123")]
|
||||
Member7,
|
||||
}
|
||||
type EnumVariant = ConflictingStringValuesVariant;
|
||||
assert_eq!(EnumVariant::__Empty.to_string(), "");
|
||||
assert_eq!(EnumVariant::_0x24.to_string(), "$");
|
||||
assert_eq!(EnumVariant::_0x240x24.to_string(), "$$");
|
||||
assert_eq!(EnumVariant::Ab.to_string(), "AB");
|
||||
assert_eq!(EnumVariant::A0x5Fb.to_string(), "A_B");
|
||||
assert_eq!(EnumVariant::A0x24B.to_string(), "A$B");
|
||||
assert_eq!(EnumVariant::_0x300x20123.to_string(), "0 123");
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user